LLVM  17.0.0git
AArch64ISelLowering.cpp
Go to the documentation of this file.
1 //===-- AArch64ISelLowering.cpp - AArch64 DAG Lowering Implementation ----===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file implements the AArch64TargetLowering class.
10 //
11 //===----------------------------------------------------------------------===//
12 
13 #include "AArch64ISelLowering.h"
15 #include "AArch64ExpandImm.h"
17 #include "AArch64PerfectShuffle.h"
18 #include "AArch64RegisterInfo.h"
19 #include "AArch64Subtarget.h"
21 #include "Utils/AArch64BaseInfo.h"
22 #include "llvm/ADT/APFloat.h"
23 #include "llvm/ADT/APInt.h"
24 #include "llvm/ADT/ArrayRef.h"
25 #include "llvm/ADT/STLExtras.h"
26 #include "llvm/ADT/SmallSet.h"
27 #include "llvm/ADT/SmallVector.h"
28 #include "llvm/ADT/Statistic.h"
29 #include "llvm/ADT/StringRef.h"
30 #include "llvm/ADT/Twine.h"
31 #include "llvm/Analysis/LoopInfo.h"
37 #include "llvm/CodeGen/Analysis.h"
53 #include "llvm/IR/Attributes.h"
54 #include "llvm/IR/Constants.h"
55 #include "llvm/IR/DataLayout.h"
56 #include "llvm/IR/DebugLoc.h"
57 #include "llvm/IR/DerivedTypes.h"
58 #include "llvm/IR/Function.h"
60 #include "llvm/IR/GlobalValue.h"
61 #include "llvm/IR/IRBuilder.h"
62 #include "llvm/IR/Instruction.h"
63 #include "llvm/IR/Instructions.h"
64 #include "llvm/IR/IntrinsicInst.h"
65 #include "llvm/IR/Intrinsics.h"
66 #include "llvm/IR/IntrinsicsAArch64.h"
67 #include "llvm/IR/Module.h"
68 #include "llvm/IR/OperandTraits.h"
69 #include "llvm/IR/PatternMatch.h"
70 #include "llvm/IR/Type.h"
71 #include "llvm/IR/Use.h"
72 #include "llvm/IR/Value.h"
73 #include "llvm/MC/MCRegisterInfo.h"
75 #include "llvm/Support/Casting.h"
76 #include "llvm/Support/CodeGen.h"
78 #include "llvm/Support/Compiler.h"
79 #include "llvm/Support/Debug.h"
82 #include "llvm/Support/KnownBits.h"
89 #include <algorithm>
90 #include <bitset>
91 #include <cassert>
92 #include <cctype>
93 #include <cstdint>
94 #include <cstdlib>
95 #include <iterator>
96 #include <limits>
97 #include <optional>
98 #include <tuple>
99 #include <utility>
100 #include <vector>
101 
102 using namespace llvm;
103 using namespace llvm::PatternMatch;
104 
105 #define DEBUG_TYPE "aarch64-lower"
106 
107 STATISTIC(NumTailCalls, "Number of tail calls");
108 STATISTIC(NumShiftInserts, "Number of vector shift inserts");
109 STATISTIC(NumOptimizedImms, "Number of times immediates were optimized");
110 
111 // FIXME: The necessary dtprel relocations don't seem to be supported
112 // well in the GNU bfd and gold linkers at the moment. Therefore, by
113 // default, for now, fall back to GeneralDynamic code generation.
115  "aarch64-elf-ldtls-generation", cl::Hidden,
116  cl::desc("Allow AArch64 Local Dynamic TLS code generation"),
117  cl::init(false));
118 
119 static cl::opt<bool>
120 EnableOptimizeLogicalImm("aarch64-enable-logical-imm", cl::Hidden,
121  cl::desc("Enable AArch64 logical imm instruction "
122  "optimization"),
123  cl::init(true));
124 
125 // Temporary option added for the purpose of testing functionality added
126 // to DAGCombiner.cpp in D92230. It is expected that this can be removed
127 // in future when both implementations will be based off MGATHER rather
128 // than the GLD1 nodes added for the SVE gather load intrinsics.
129 static cl::opt<bool>
130 EnableCombineMGatherIntrinsics("aarch64-enable-mgather-combine", cl::Hidden,
131  cl::desc("Combine extends of AArch64 masked "
132  "gather intrinsics"),
133  cl::init(true));
134 
135 // All of the XOR, OR and CMP use ALU ports, and data dependency will become the
136 // bottleneck after this transform on high end CPU. So this max leaf node
137 // limitation is guard cmp+ccmp will be profitable.
138 static cl::opt<unsigned> MaxXors("aarch64-max-xors", cl::init(16), cl::Hidden,
139  cl::desc("Maximum of xors"));
140 
141 /// Value type used for condition codes.
142 static const MVT MVT_CC = MVT::i32;
143 
144 static const MCPhysReg GPRArgRegs[] = {AArch64::X0, AArch64::X1, AArch64::X2,
145  AArch64::X3, AArch64::X4, AArch64::X5,
146  AArch64::X6, AArch64::X7};
147 static const MCPhysReg FPRArgRegs[] = {AArch64::Q0, AArch64::Q1, AArch64::Q2,
148  AArch64::Q3, AArch64::Q4, AArch64::Q5,
149  AArch64::Q6, AArch64::Q7};
150 
152 
154 
155 static inline EVT getPackedSVEVectorVT(EVT VT) {
156  switch (VT.getSimpleVT().SimpleTy) {
157  default:
158  llvm_unreachable("unexpected element type for vector");
159  case MVT::i8:
160  return MVT::nxv16i8;
161  case MVT::i16:
162  return MVT::nxv8i16;
163  case MVT::i32:
164  return MVT::nxv4i32;
165  case MVT::i64:
166  return MVT::nxv2i64;
167  case MVT::f16:
168  return MVT::nxv8f16;
169  case MVT::f32:
170  return MVT::nxv4f32;
171  case MVT::f64:
172  return MVT::nxv2f64;
173  case MVT::bf16:
174  return MVT::nxv8bf16;
175  }
176 }
177 
178 // NOTE: Currently there's only a need to return integer vector types. If this
179 // changes then just add an extra "type" parameter.
181  switch (EC.getKnownMinValue()) {
182  default:
183  llvm_unreachable("unexpected element count for vector");
184  case 16:
185  return MVT::nxv16i8;
186  case 8:
187  return MVT::nxv8i16;
188  case 4:
189  return MVT::nxv4i32;
190  case 2:
191  return MVT::nxv2i64;
192  }
193 }
194 
195 static inline EVT getPromotedVTForPredicate(EVT VT) {
197  "Expected scalable predicate vector type!");
198  switch (VT.getVectorMinNumElements()) {
199  default:
200  llvm_unreachable("unexpected element count for vector");
201  case 2:
202  return MVT::nxv2i64;
203  case 4:
204  return MVT::nxv4i32;
205  case 8:
206  return MVT::nxv8i16;
207  case 16:
208  return MVT::nxv16i8;
209  }
210 }
211 
212 /// Returns true if VT's elements occupy the lowest bit positions of its
213 /// associated register class without any intervening space.
214 ///
215 /// For example, nxv2f16, nxv4f16 and nxv8f16 are legal types that belong to the
216 /// same register class, but only nxv8f16 can be treated as a packed vector.
217 static inline bool isPackedVectorType(EVT VT, SelectionDAG &DAG) {
218  assert(VT.isVector() && DAG.getTargetLoweringInfo().isTypeLegal(VT) &&
219  "Expected legal vector type!");
220  return VT.isFixedLengthVector() ||
222 }
223 
224 // Returns true for ####_MERGE_PASSTHRU opcodes, whose operands have a leading
225 // predicate and end with a passthru value matching the result type.
226 static bool isMergePassthruOpcode(unsigned Opc) {
227  switch (Opc) {
228  default:
229  return false;
259  return true;
260  }
261 }
262 
263 // Returns true if inactive lanes are known to be zeroed by construction.
265  switch (Op.getOpcode()) {
266  default:
267  // We guarantee i1 splat_vectors to zero the other lanes by
268  // implementing it with ptrue and possibly a punpklo for nxv1i1.
269  if (ISD::isConstantSplatVectorAllOnes(Op.getNode()))
270  return true;
271  return false;
272  case AArch64ISD::PTRUE:
274  return true;
276  switch (Op.getConstantOperandVal(0)) {
277  default:
278  return false;
279  case Intrinsic::aarch64_sve_ptrue:
280  case Intrinsic::aarch64_sve_pnext:
281  case Intrinsic::aarch64_sve_cmpeq:
282  case Intrinsic::aarch64_sve_cmpne:
283  case Intrinsic::aarch64_sve_cmpge:
284  case Intrinsic::aarch64_sve_cmpgt:
285  case Intrinsic::aarch64_sve_cmphs:
286  case Intrinsic::aarch64_sve_cmphi:
287  case Intrinsic::aarch64_sve_cmpeq_wide:
288  case Intrinsic::aarch64_sve_cmpne_wide:
289  case Intrinsic::aarch64_sve_cmpge_wide:
290  case Intrinsic::aarch64_sve_cmpgt_wide:
291  case Intrinsic::aarch64_sve_cmplt_wide:
292  case Intrinsic::aarch64_sve_cmple_wide:
293  case Intrinsic::aarch64_sve_cmphs_wide:
294  case Intrinsic::aarch64_sve_cmphi_wide:
295  case Intrinsic::aarch64_sve_cmplo_wide:
296  case Intrinsic::aarch64_sve_cmpls_wide:
297  case Intrinsic::aarch64_sve_fcmpeq:
298  case Intrinsic::aarch64_sve_fcmpne:
299  case Intrinsic::aarch64_sve_fcmpge:
300  case Intrinsic::aarch64_sve_fcmpgt:
301  case Intrinsic::aarch64_sve_fcmpuo:
302  case Intrinsic::aarch64_sve_facgt:
303  case Intrinsic::aarch64_sve_facge:
304  case Intrinsic::aarch64_sve_whilege:
305  case Intrinsic::aarch64_sve_whilegt:
306  case Intrinsic::aarch64_sve_whilehi:
307  case Intrinsic::aarch64_sve_whilehs:
308  case Intrinsic::aarch64_sve_whilele:
309  case Intrinsic::aarch64_sve_whilelo:
310  case Intrinsic::aarch64_sve_whilels:
311  case Intrinsic::aarch64_sve_whilelt:
312  case Intrinsic::aarch64_sve_match:
313  case Intrinsic::aarch64_sve_nmatch:
314  case Intrinsic::aarch64_sve_whilege_x2:
315  case Intrinsic::aarch64_sve_whilegt_x2:
316  case Intrinsic::aarch64_sve_whilehi_x2:
317  case Intrinsic::aarch64_sve_whilehs_x2:
318  case Intrinsic::aarch64_sve_whilele_x2:
319  case Intrinsic::aarch64_sve_whilelo_x2:
320  case Intrinsic::aarch64_sve_whilels_x2:
321  case Intrinsic::aarch64_sve_whilelt_x2:
322  return true;
323  }
324  }
325 }
326 
328  const AArch64Subtarget &STI)
329  : TargetLowering(TM), Subtarget(&STI) {
330  // AArch64 doesn't have comparisons which set GPRs or setcc instructions, so
331  // we have to make something up. Arbitrarily, choose ZeroOrOne.
333  // When comparing vectors the result sets the different elements in the
334  // vector to all-one or all-zero.
336 
337  // Set up the register classes.
338  addRegisterClass(MVT::i32, &AArch64::GPR32allRegClass);
339  addRegisterClass(MVT::i64, &AArch64::GPR64allRegClass);
340 
341  if (Subtarget->hasLS64()) {
342  addRegisterClass(MVT::i64x8, &AArch64::GPR64x8ClassRegClass);
345  }
346 
347  if (Subtarget->hasFPARMv8()) {
348  addRegisterClass(MVT::f16, &AArch64::FPR16RegClass);
349  addRegisterClass(MVT::bf16, &AArch64::FPR16RegClass);
350  addRegisterClass(MVT::f32, &AArch64::FPR32RegClass);
351  addRegisterClass(MVT::f64, &AArch64::FPR64RegClass);
352  addRegisterClass(MVT::f128, &AArch64::FPR128RegClass);
353  }
354 
355  if (Subtarget->hasNEON()) {
356  addRegisterClass(MVT::v16i8, &AArch64::FPR8RegClass);
357  addRegisterClass(MVT::v8i16, &AArch64::FPR16RegClass);
358  // Someone set us up the NEON.
359  addDRTypeForNEON(MVT::v2f32);
360  addDRTypeForNEON(MVT::v8i8);
361  addDRTypeForNEON(MVT::v4i16);
362  addDRTypeForNEON(MVT::v2i32);
363  addDRTypeForNEON(MVT::v1i64);
364  addDRTypeForNEON(MVT::v1f64);
365  addDRTypeForNEON(MVT::v4f16);
366  if (Subtarget->hasBF16())
367  addDRTypeForNEON(MVT::v4bf16);
368 
369  addQRTypeForNEON(MVT::v4f32);
370  addQRTypeForNEON(MVT::v2f64);
371  addQRTypeForNEON(MVT::v16i8);
372  addQRTypeForNEON(MVT::v8i16);
373  addQRTypeForNEON(MVT::v4i32);
374  addQRTypeForNEON(MVT::v2i64);
375  addQRTypeForNEON(MVT::v8f16);
376  if (Subtarget->hasBF16())
377  addQRTypeForNEON(MVT::v8bf16);
378  }
379 
380  if (Subtarget->hasSVEorSME()) {
381  // Add legal sve predicate types
382  addRegisterClass(MVT::nxv1i1, &AArch64::PPRRegClass);
383  addRegisterClass(MVT::nxv2i1, &AArch64::PPRRegClass);
384  addRegisterClass(MVT::nxv4i1, &AArch64::PPRRegClass);
385  addRegisterClass(MVT::nxv8i1, &AArch64::PPRRegClass);
386  addRegisterClass(MVT::nxv16i1, &AArch64::PPRRegClass);
387 
388  // Add legal sve data types
389  addRegisterClass(MVT::nxv16i8, &AArch64::ZPRRegClass);
390  addRegisterClass(MVT::nxv8i16, &AArch64::ZPRRegClass);
391  addRegisterClass(MVT::nxv4i32, &AArch64::ZPRRegClass);
392  addRegisterClass(MVT::nxv2i64, &AArch64::ZPRRegClass);
393 
394  addRegisterClass(MVT::nxv2f16, &AArch64::ZPRRegClass);
395  addRegisterClass(MVT::nxv4f16, &AArch64::ZPRRegClass);
396  addRegisterClass(MVT::nxv8f16, &AArch64::ZPRRegClass);
397  addRegisterClass(MVT::nxv2f32, &AArch64::ZPRRegClass);
398  addRegisterClass(MVT::nxv4f32, &AArch64::ZPRRegClass);
399  addRegisterClass(MVT::nxv2f64, &AArch64::ZPRRegClass);
400 
401  if (Subtarget->hasBF16()) {
402  addRegisterClass(MVT::nxv2bf16, &AArch64::ZPRRegClass);
403  addRegisterClass(MVT::nxv4bf16, &AArch64::ZPRRegClass);
404  addRegisterClass(MVT::nxv8bf16, &AArch64::ZPRRegClass);
405  }
406 
407  if (Subtarget->useSVEForFixedLengthVectors()) {
410  addRegisterClass(VT, &AArch64::ZPRRegClass);
411 
414  addRegisterClass(VT, &AArch64::ZPRRegClass);
415  }
416  }
417 
418  // Compute derived properties from the register classes
420 
421  // Provide all sorts of operation actions
458 
462 
466 
468 
469  // Custom lowering hooks are needed for XOR
470  // to fold it into CSINC/CSINV.
473 
474  // Virtually no operation on f128 is legal, but LLVM can't expand them when
475  // there's a valid register class, so we need custom operations in most cases.
499  // FIXME: f128 FMINIMUM and FMAXIMUM (including STRICT versions) currently
500  // aren't handled.
501 
502  // Lowering for many of the conversions is actually specified by the non-f128
503  // type. The LowerXXX function will be trivial when f128 isn't involved.
534 
539 
540  // Variable arguments.
545 
546  // Variable-sized objects.
549 
550  if (Subtarget->isTargetWindows())
552  else
554 
555  // Constant pool entries
557 
558  // BlockAddress
560 
561  // AArch64 lacks both left-rotate and popcount instructions.
564  for (MVT VT : MVT::fixedlen_vector_valuetypes()) {
567  }
568 
569  // AArch64 doesn't have i32 MULH{S|U}.
572 
573  // AArch64 doesn't have {U|S}MUL_LOHI.
576 
577  if (Subtarget->hasCSSC()) {
581 
583 
587 
590 
595 
600  } else {
604 
607 
610  }
611 
614  for (MVT VT : MVT::fixedlen_vector_valuetypes()) {
617  }
624 
625  // Custom lower Add/Sub/Mul with overflow.
638 
647 
656  if (Subtarget->hasFullFP16())
658  else
660 
661  for (auto Op : {ISD::FREM, ISD::FPOW, ISD::FPOWI,
671  }
672 
673  if (!Subtarget->hasFullFP16()) {
674  for (auto Op :
690 
691  // Round-to-integer need custom lowering for fp16, as Promote doesn't work
692  // because the result type is integer.
696 
697  // promote v4f16 to v4f32 when that is known to be safe.
702 
719 
741  }
742 
743  // AArch64 has implementations of a lot of rounding-like FP operations.
744  for (auto Op :
755  for (MVT Ty : {MVT::f32, MVT::f64})
757  if (Subtarget->hasFullFP16())
759  }
760 
761  // Basic strict FP operations are legal
764  for (MVT Ty : {MVT::f32, MVT::f64})
766  if (Subtarget->hasFullFP16())
768  }
769 
770  // Strict conversion to a larger type is legal
771  for (auto VT : {MVT::f32, MVT::f64})
773 
775 
778 
784 
785  // Generate outline atomics library calls only if LSE was not specified for
786  // subtarget
787  if (Subtarget->outlineAtomics() && !Subtarget->hasLSE()) {
813 #define LCALLNAMES(A, B, N) \
814  setLibcallName(A##N##_RELAX, #B #N "_relax"); \
815  setLibcallName(A##N##_ACQ, #B #N "_acq"); \
816  setLibcallName(A##N##_REL, #B #N "_rel"); \
817  setLibcallName(A##N##_ACQ_REL, #B #N "_acq_rel");
818 #define LCALLNAME4(A, B) \
819  LCALLNAMES(A, B, 1) \
820  LCALLNAMES(A, B, 2) LCALLNAMES(A, B, 4) LCALLNAMES(A, B, 8)
821 #define LCALLNAME5(A, B) \
822  LCALLNAMES(A, B, 1) \
823  LCALLNAMES(A, B, 2) \
824  LCALLNAMES(A, B, 4) LCALLNAMES(A, B, 8) LCALLNAMES(A, B, 16)
825  LCALLNAME5(RTLIB::OUTLINE_ATOMIC_CAS, __aarch64_cas)
826  LCALLNAME4(RTLIB::OUTLINE_ATOMIC_SWP, __aarch64_swp)
827  LCALLNAME4(RTLIB::OUTLINE_ATOMIC_LDADD, __aarch64_ldadd)
828  LCALLNAME4(RTLIB::OUTLINE_ATOMIC_LDSET, __aarch64_ldset)
829  LCALLNAME4(RTLIB::OUTLINE_ATOMIC_LDCLR, __aarch64_ldclr)
830  LCALLNAME4(RTLIB::OUTLINE_ATOMIC_LDEOR, __aarch64_ldeor)
831 #undef LCALLNAMES
832 #undef LCALLNAME4
833 #undef LCALLNAME5
834  }
835 
836  if (Subtarget->hasLSE128()) {
837  // Custom lowering because i128 is not legal. Must be replaced by 2x64
838  // values. ATOMIC_LOAD_AND also needs op legalisation to emit LDCLRP.
842  }
843 
844  // 128-bit loads and stores can be done without expanding
847 
848  // Aligned 128-bit loads and stores are single-copy atomic according to the
849  // v8.4a spec. LRCPC3 introduces 128-bit STILP/LDIAPP but still requires LSE2.
850  if (Subtarget->hasLSE2()) {
853  }
854 
855  // 256 bit non-temporal stores can be lowered to STNP. Do this as part of the
856  // custom lowering, as there are no un-paired non-temporal stores and
857  // legalization will break up 256 bit inputs.
865 
866  // 256 bit non-temporal loads can be lowered to LDNP. This is done using
867  // custom lowering, as there are no un-paired non-temporal loads legalization
868  // will break up 256 bit inputs.
876 
877  // Lower READCYCLECOUNTER using an mrs from CNTVCT_EL0.
879 
880  if (getLibcallName(RTLIB::SINCOS_STRET_F32) != nullptr &&
881  getLibcallName(RTLIB::SINCOS_STRET_F64) != nullptr) {
882  // Issue __sincos_stret if available.
885  } else {
888  }
889 
890  if (Subtarget->getTargetTriple().isOSMSVCRT()) {
891  // MSVCRT doesn't have powi; fall back to pow
892  setLibcallName(RTLIB::POWI_F32, nullptr);
893  setLibcallName(RTLIB::POWI_F64, nullptr);
894  }
895 
896  // Make floating-point constants legal for the large code model, so they don't
897  // become loads from the constant pool.
898  if (Subtarget->isTargetMachO() && TM.getCodeModel() == CodeModel::Large) {
901  }
902 
903  // AArch64 does not have floating-point extending loads, i1 sign-extending
904  // load, floating-point truncating stores, or v2i32->v2i16 truncating store.
905  for (MVT VT : MVT::fp_valuetypes()) {
910  }
911  for (MVT VT : MVT::integer_valuetypes())
913 
921 
925 
926  // Indexed loads and stores are supported.
927  for (unsigned im = (unsigned)ISD::PRE_INC;
945  }
946 
947  // Trap.
951 
952  // We combine OR nodes for bitfield operations.
954  // Try to create BICs for vector ANDs.
956 
957  // Vector add and sub nodes may conceal a high-half opportunity.
958  // Also, try to fold ADD into CSINC/CSINV..
960  ISD::UINT_TO_FP});
961 
964 
965  // Try and combine setcc with csel
967 
969 
976 
978 
980 
982 
986 
988 
990 
992 
994 
995  // In case of strict alignment, avoid an excessive number of byte wide stores.
998  Subtarget->requiresStrictAlign() ? MaxStoresPerMemsetOptSize : 32;
999 
1003  Subtarget->requiresStrictAlign() ? MaxStoresPerMemcpyOptSize : 16;
1004 
1006  MaxStoresPerMemmove = 4;
1007 
1010  Subtarget->requiresStrictAlign() ? MaxLoadsPerMemcmpOptSize : 8;
1011 
1013 
1015 
1016  EnableExtLdPromotion = true;
1017 
1018  // Set required alignment.
1020  // Set preferred alignments.
1024 
1025  // Only change the limit for entries in a jump table if specified by
1026  // the sub target, but not at the command line.
1027  unsigned MaxJT = STI.getMaximumJumpTableSize();
1028  if (MaxJT && getMaximumJumpTableSize() == UINT_MAX)
1029  setMaximumJumpTableSize(MaxJT);
1030 
1031  setHasExtractBitsInsn(true);
1032 
1034 
1036 
1037  if (Subtarget->hasNEON()) {
1038  // FIXME: v1f64 shouldn't be legal if we can avoid it, because it leads to
1039  // silliness like this:
1040  for (auto Op :
1056 
1057  for (auto Op :
1063 
1064  // AArch64 doesn't have a direct vector ->f32 conversion instructions for
1065  // elements smaller than i32, so promote the input to i32 first.
1068 
1069  // Similarly, there is no direct i32 -> f64 vector conversion instruction.
1070  // Or, direct i32 -> f16 vector conversion. Set it so custom, so the
1071  // conversion happens in two steps: v4i32 -> v4f32 -> v4f16
1074  for (auto VT : {MVT::v2i32, MVT::v2i64, MVT::v4i32})
1076 
1077  if (Subtarget->hasFullFP16()) {
1079 
1088  } else {
1089  // when AArch64 doesn't have fullfp16 support, promote the input
1090  // to i32 first.
1099  }
1100 
1109  for (auto VT : {MVT::v1i64, MVT::v2i64}) {
1114  }
1115 
1116  // AArch64 doesn't have MUL.2d:
1118  // Custom handling for some quad-vector types to detect MULL.
1122 
1123  // Saturates
1124  for (MVT VT : { MVT::v8i8, MVT::v4i16, MVT::v2i32,
1130  }
1131 
1133  MVT::v4i32}) {
1140  }
1141 
1142  // Vector reductions
1143  for (MVT VT : { MVT::v4f16, MVT::v2f32,
1145  if (VT.getVectorElementType() != MVT::f16 || Subtarget->hasFullFP16()) {
1148 
1150  }
1151  }
1152  for (MVT VT : { MVT::v8i8, MVT::v4i16, MVT::v2i32,
1159  }
1161 
1164  // Likewise, narrowing and extending vector loads/stores aren't handled
1165  // directly.
1166  for (MVT VT : MVT::fixedlen_vector_valuetypes()) {
1168 
1169  if (VT == MVT::v16i8 || VT == MVT::v8i16 || VT == MVT::v4i32) {
1172  } else {
1175  }
1178 
1181 
1182  for (MVT InnerVT : MVT::fixedlen_vector_valuetypes()) {
1183  setTruncStoreAction(VT, InnerVT, Expand);
1184  setLoadExtAction(ISD::SEXTLOAD, VT, InnerVT, Expand);
1185  setLoadExtAction(ISD::ZEXTLOAD, VT, InnerVT, Expand);
1186  setLoadExtAction(ISD::EXTLOAD, VT, InnerVT, Expand);
1187  }
1188  }
1189 
1190  // AArch64 has implementations of a lot of rounding-like FP operations.
1191  for (auto Op :
1196  for (MVT Ty : {MVT::v2f32, MVT::v4f32, MVT::v2f64})
1197  setOperationAction(Op, Ty, Legal);
1198  if (Subtarget->hasFullFP16())
1199  for (MVT Ty : {MVT::v4f16, MVT::v8f16})
1200  setOperationAction(Op, Ty, Legal);
1201  }
1202 
1204 
1211 
1212  // ADDP custom lowering
1213  for (MVT VT : { MVT::v32i8, MVT::v16i16, MVT::v8i32, MVT::v4i64 })
1215  // FADDP custom lowering
1216  for (MVT VT : { MVT::v16f16, MVT::v8f32, MVT::v4f64 })
1218  }
1219 
1220  if (Subtarget->hasSME()) {
1222  }
1223 
1224  // FIXME: Move lowering for more nodes here if those are common between
1225  // SVE and SME.
1226  if (Subtarget->hasSVEorSME()) {
1227  for (auto VT :
1231  }
1232  }
1233 
1234  if (Subtarget->hasSVE()) {
1235  for (auto VT : {MVT::nxv16i8, MVT::nxv8i16, MVT::nxv4i32, MVT::nxv2i64}) {
1276 
1282 
1291 
1292  if (Subtarget->hasSVE2()) {
1297  }
1298  }
1299 
1300  // Illegal unpacked integer vector types.
1301  for (auto VT : {MVT::nxv8i8, MVT::nxv4i16, MVT::nxv2i32}) {
1304  }
1305 
1306  // Legalize unpacked bitcasts to REINTERPRET_CAST.
1310 
1311  for (auto VT :
1315 
1316  for (auto VT :
1325 
1329 
1330  // There are no legal MVT::nxv16f## based types.
1331  if (VT != MVT::nxv16i1) {
1334  }
1335  }
1336 
1337  // NEON doesn't support masked loads/stores/gathers/scatters, but SVE does
1345  }
1346 
1347  // Firstly, exclude all scalable vector extending loads/truncating stores,
1348  // include both integer and floating scalable vector.
1349  for (MVT VT : MVT::scalable_vector_valuetypes()) {
1350  for (MVT InnerVT : MVT::scalable_vector_valuetypes()) {
1351  setTruncStoreAction(VT, InnerVT, Expand);
1352  setLoadExtAction(ISD::SEXTLOAD, VT, InnerVT, Expand);
1353  setLoadExtAction(ISD::ZEXTLOAD, VT, InnerVT, Expand);
1354  setLoadExtAction(ISD::EXTLOAD, VT, InnerVT, Expand);
1355  }
1356  }
1357 
1358  // Then, selectively enable those which we directly support.
1365  for (auto Op : {ISD::ZEXTLOAD, ISD::SEXTLOAD, ISD::EXTLOAD}) {
1372  }
1373 
1374  // SVE supports truncating stores of 64 and 128-bit vectors
1380 
1417 
1430 
1442  }
1443 
1444  for (auto VT : {MVT::nxv2bf16, MVT::nxv4bf16, MVT::nxv8bf16}) {
1451  }
1452 
1455 
1456  // NEON doesn't support integer divides, but SVE does
1457  for (auto VT : {MVT::v8i8, MVT::v16i8, MVT::v4i16, MVT::v8i16, MVT::v2i32,
1461  }
1462 
1463  // NEON doesn't support 64-bit vector integer muls, but SVE does.
1466 
1467  // NEON doesn't support across-vector reductions, but SVE does.
1468  for (auto VT : {MVT::v4f16, MVT::v8f16, MVT::v2f32, MVT::v4f32, MVT::v2f64})
1470 
1471  if (Subtarget->forceStreamingCompatibleSVE()) {
1483  addTypeForStreamingSVE(VT);
1484 
1485  for (MVT VT :
1487  addTypeForStreamingSVE(VT);
1488  }
1489 
1490  // NOTE: Currently this has to happen after computeRegisterProperties rather
1491  // than the preferred option of combining it with the addRegisterClass call.
1492  if (Subtarget->useSVEForFixedLengthVectors()) {
1495  addTypeForFixedLengthSVE(VT);
1498  addTypeForFixedLengthSVE(VT);
1499 
1500  // 64bit results can mean a bigger than NEON input.
1501  for (auto VT : {MVT::v8i8, MVT::v4i16})
1504 
1505  // 128bit results imply a bigger than NEON input.
1506  for (auto VT : {MVT::v16i8, MVT::v8i16, MVT::v4i32})
1508  for (auto VT : {MVT::v8f16, MVT::v4f32})
1510 
1511  // These operations are not supported on NEON but SVE can do them.
1532 
1533  // Int operations with no NEON support.
1534  for (auto VT : {MVT::v8i8, MVT::v16i8, MVT::v4i16, MVT::v8i16,
1541  }
1542 
1543 
1544  // Use SVE for vectors with more than 2 elements.
1545  for (auto VT : {MVT::v4f16, MVT::v8f16, MVT::v4f32})
1547  }
1548 
1553 
1555  }
1556 
1557  if (Subtarget->hasMOPS() && Subtarget->hasMTE()) {
1558  // Only required for llvm.aarch64.mops.memset.tag
1560  }
1561 
1563 
1564  PredictableSelectIsExpensive = Subtarget->predictableSelectIsExpensive();
1565 
1566  IsStrictFPEnabled = true;
1567 }
1568 
1569 void AArch64TargetLowering::addTypeForNEON(MVT VT) {
1570  assert(VT.isVector() && "VT should be a vector type");
1571 
1572  if (VT.isFloatingPoint()) {
1574  setOperationPromotedToType(ISD::LOAD, VT, PromoteTo);
1575  setOperationPromotedToType(ISD::STORE, VT, PromoteTo);
1576  }
1577 
1578  // Mark vector float intrinsics as expand.
1579  if (VT == MVT::v2f32 || VT == MVT::v4f32 || VT == MVT::v2f64) {
1588  }
1589 
1590  // But we do support custom-lowering for FCOPYSIGN.
1591  if (VT == MVT::v2f32 || VT == MVT::v4f32 || VT == MVT::v2f64 ||
1592  ((VT == MVT::v4f16 || VT == MVT::v8f16) && Subtarget->hasFullFP16()))
1594 
1607 
1611  for (MVT InnerVT : MVT::all_valuetypes())
1612  setLoadExtAction(ISD::EXTLOAD, InnerVT, VT, Expand);
1613 
1614  // CNT supports only B element sizes, then use UADDLP to widen.
1615  if (VT != MVT::v8i8 && VT != MVT::v16i8)
1617 
1623 
1624  for (unsigned Opcode :
1627  setOperationAction(Opcode, VT, Custom);
1628 
1629  if (!VT.isFloatingPoint())
1631 
1632  // [SU][MIN|MAX] are available for all NEON types apart from i64.
1633  if (!VT.isFloatingPoint() && VT != MVT::v2i64 && VT != MVT::v1i64)
1634  for (unsigned Opcode : {ISD::SMIN, ISD::SMAX, ISD::UMIN, ISD::UMAX})
1635  setOperationAction(Opcode, VT, Legal);
1636 
1637  // F[MIN|MAX][NUM|NAN] and simple strict operations are available for all FP
1638  // NEON types.
1639  if (VT.isFloatingPoint() &&
1640  VT.getVectorElementType() != MVT::bf16 &&
1641  (VT.getVectorElementType() != MVT::f16 || Subtarget->hasFullFP16()))
1642  for (unsigned Opcode :
1648  setOperationAction(Opcode, VT, Legal);
1649 
1650  // Strict fp extend and trunc are legal
1651  if (VT.isFloatingPoint() && VT.getScalarSizeInBits() != 16)
1653  if (VT.isFloatingPoint() && VT.getScalarSizeInBits() != 64)
1655 
1656  // FIXME: We could potentially make use of the vector comparison instructions
1657  // for STRICT_FSETCC and STRICT_FSETCSS, but there's a number of
1658  // complications:
1659  // * FCMPEQ/NE are quiet comparisons, the rest are signalling comparisons,
1660  // so we would need to expand when the condition code doesn't match the
1661  // kind of comparison.
1662  // * Some kinds of comparison require more than one FCMXY instruction so
1663  // would need to be expanded instead.
1664  // * The lowering of the non-strict versions involves target-specific ISD
1665  // nodes so we would likely need to add strict versions of all of them and
1666  // handle them appropriately.
1669 
1670  if (Subtarget->isLittleEndian()) {
1671  for (unsigned im = (unsigned)ISD::PRE_INC;
1675  }
1676  }
1677 
1678  if (Subtarget->hasD128()) {
1681  }
1682 }
1683 
1685  EVT OpVT) const {
1686  // Only SVE has a 1:1 mapping from intrinsic -> instruction (whilelo).
1687  if (!Subtarget->hasSVE())
1688  return true;
1689 
1690  // We can only support legal predicate result types. We can use the SVE
1691  // whilelo instruction for generating fixed-width predicates too.
1692  if (ResVT != MVT::nxv2i1 && ResVT != MVT::nxv4i1 && ResVT != MVT::nxv8i1 &&
1693  ResVT != MVT::nxv16i1 && ResVT != MVT::v2i1 && ResVT != MVT::v4i1 &&
1694  ResVT != MVT::v8i1 && ResVT != MVT::v16i1)
1695  return true;
1696 
1697  // The whilelo instruction only works with i32 or i64 scalar inputs.
1698  if (OpVT != MVT::i32 && OpVT != MVT::i64)
1699  return true;
1700 
1701  return false;
1702 }
1703 
1704 void AArch64TargetLowering::addTypeForStreamingSVE(MVT VT) {
1705  // By default set all operations to Expand,
1706  // then change to Legal/Custom if needed.
1707  for (unsigned Op = 0; Op < ISD::BUILTIN_OP_END; ++Op)
1709 
1710  assert(VT.isFixedLengthVector() && "Expected fixed length vector type!");
1711 
1712  if (VT.isFloatingPoint()) {
1722  }
1723 
1724  // STORE, LOAD, SCALAR_TO_VECTOR and BITCAST are natively supported,
1725  // so no need to Custom/Expand them.
1730 
1731  // Mark integer truncating stores/extending loads as having custom lowering
1732  if (VT.isInteger()) {
1733  MVT InnerVT = VT.changeVectorElementType(MVT::i8);
1734  while (InnerVT != VT) {
1735  setTruncStoreAction(VT, InnerVT, Custom);
1736  setLoadExtAction(ISD::ZEXTLOAD, VT, InnerVT, Custom);
1737  setLoadExtAction(ISD::SEXTLOAD, VT, InnerVT, Custom);
1738  InnerVT = InnerVT.changeVectorElementType(
1739  MVT::getIntegerVT(2 * InnerVT.getScalarSizeInBits()));
1740  }
1741  }
1742 
1743  // Mark floating-point truncating stores/extending loads as having custom
1744  // lowering
1745  if (VT.isFloatingPoint()) {
1746  MVT InnerVT = VT.changeVectorElementType(MVT::f16);
1747  while (InnerVT != VT) {
1748  setTruncStoreAction(VT, InnerVT, Custom);
1749  setLoadExtAction(ISD::EXTLOAD, VT, InnerVT, Custom);
1750  InnerVT = InnerVT.changeVectorElementType(
1752  }
1753  }
1754 
1834 }
1835 
1836 void AArch64TargetLowering::addTypeForFixedLengthSVE(MVT VT) {
1837  assert(VT.isFixedLengthVector() && "Expected fixed length vector type!");
1838 
1839  // By default everything must be expanded.
1840  for (unsigned Op = 0; Op < ISD::BUILTIN_OP_END; ++Op)
1842 
1843  if (VT.isFloatingPoint()) {
1853  }
1854 
1855  // Mark integer truncating stores/extending loads as having custom lowering
1856  if (VT.isInteger()) {
1857  MVT InnerVT = VT.changeVectorElementType(MVT::i8);
1858  while (InnerVT != VT) {
1859  setTruncStoreAction(VT, InnerVT, Custom);
1860  setLoadExtAction(ISD::ZEXTLOAD, VT, InnerVT, Custom);
1861  setLoadExtAction(ISD::SEXTLOAD, VT, InnerVT, Custom);
1862  InnerVT = InnerVT.changeVectorElementType(
1863  MVT::getIntegerVT(2 * InnerVT.getScalarSizeInBits()));
1864  }
1865  }
1866 
1867  // Mark floating-point truncating stores/extending loads as having custom
1868  // lowering
1869  if (VT.isFloatingPoint()) {
1870  MVT InnerVT = VT.changeVectorElementType(MVT::f16);
1871  while (InnerVT != VT) {
1872  setTruncStoreAction(VT, InnerVT, Custom);
1873  setLoadExtAction(ISD::EXTLOAD, VT, InnerVT, Custom);
1874  InnerVT = InnerVT.changeVectorElementType(
1876  }
1877  }
1878 
1879  // Lower fixed length vector operations to scalable equivalents.
1964 }
1965 
1966 void AArch64TargetLowering::addDRTypeForNEON(MVT VT) {
1967  addRegisterClass(VT, &AArch64::FPR64RegClass);
1968  addTypeForNEON(VT);
1969 }
1970 
1971 void AArch64TargetLowering::addQRTypeForNEON(MVT VT) {
1972  addRegisterClass(VT, &AArch64::FPR128RegClass);
1973  addTypeForNEON(VT);
1974 }
1975 
1977  LLVMContext &C, EVT VT) const {
1978  if (!VT.isVector())
1979  return MVT::i32;
1980  if (VT.isScalableVector())
1983 }
1984 
1985 // isIntImmediate - This method tests to see if the node is a constant
1986 // operand. If so Imm will receive the value.
1987 static bool isIntImmediate(const SDNode *N, uint64_t &Imm) {
1988  if (const ConstantSDNode *C = dyn_cast<const ConstantSDNode>(N)) {
1989  Imm = C->getZExtValue();
1990  return true;
1991  }
1992  return false;
1993 }
1994 
1995 // isOpcWithIntImmediate - This method tests to see if the node is a specific
1996 // opcode and that it has a immediate integer right operand.
1997 // If so Imm will receive the value.
1998 static bool isOpcWithIntImmediate(const SDNode *N, unsigned Opc,
1999  uint64_t &Imm) {
2000  return N->getOpcode() == Opc &&
2001  isIntImmediate(N->getOperand(1).getNode(), Imm);
2002 }
2003 
2004 static bool optimizeLogicalImm(SDValue Op, unsigned Size, uint64_t Imm,
2005  const APInt &Demanded,
2007  unsigned NewOpc) {
2008  uint64_t OldImm = Imm, NewImm, Enc;
2009  uint64_t Mask = ((uint64_t)(-1LL) >> (64 - Size)), OrigMask = Mask;
2010 
2011  // Return if the immediate is already all zeros, all ones, a bimm32 or a
2012  // bimm64.
2013  if (Imm == 0 || Imm == Mask ||
2015  return false;
2016 
2017  unsigned EltSize = Size;
2018  uint64_t DemandedBits = Demanded.getZExtValue();
2019 
2020  // Clear bits that are not demanded.
2021  Imm &= DemandedBits;
2022 
2023  while (true) {
2024  // The goal here is to set the non-demanded bits in a way that minimizes
2025  // the number of switching between 0 and 1. In order to achieve this goal,
2026  // we set the non-demanded bits to the value of the preceding demanded bits.
2027  // For example, if we have an immediate 0bx10xx0x1 ('x' indicates a
2028  // non-demanded bit), we copy bit0 (1) to the least significant 'x',
2029  // bit2 (0) to 'xx', and bit6 (1) to the most significant 'x'.
2030  // The final result is 0b11000011.
2031  uint64_t NonDemandedBits = ~DemandedBits;
2032  uint64_t InvertedImm = ~Imm & DemandedBits;
2033  uint64_t RotatedImm =
2034  ((InvertedImm << 1) | (InvertedImm >> (EltSize - 1) & 1)) &
2035  NonDemandedBits;
2036  uint64_t Sum = RotatedImm + NonDemandedBits;
2037  bool Carry = NonDemandedBits & ~Sum & (1ULL << (EltSize - 1));
2038  uint64_t Ones = (Sum + Carry) & NonDemandedBits;
2039  NewImm = (Imm | Ones) & Mask;
2040 
2041  // If NewImm or its bitwise NOT is a shifted mask, it is a bitmask immediate
2042  // or all-ones or all-zeros, in which case we can stop searching. Otherwise,
2043  // we halve the element size and continue the search.
2044  if (isShiftedMask_64(NewImm) || isShiftedMask_64(~(NewImm | ~Mask)))
2045  break;
2046 
2047  // We cannot shrink the element size any further if it is 2-bits.
2048  if (EltSize == 2)
2049  return false;
2050 
2051  EltSize /= 2;
2052  Mask >>= EltSize;
2053  uint64_t Hi = Imm >> EltSize, DemandedBitsHi = DemandedBits >> EltSize;
2054 
2055  // Return if there is mismatch in any of the demanded bits of Imm and Hi.
2056  if (((Imm ^ Hi) & (DemandedBits & DemandedBitsHi) & Mask) != 0)
2057  return false;
2058 
2059  // Merge the upper and lower halves of Imm and DemandedBits.
2060  Imm |= Hi;
2061  DemandedBits |= DemandedBitsHi;
2062  }
2063 
2064  ++NumOptimizedImms;
2065 
2066  // Replicate the element across the register width.
2067  while (EltSize < Size) {
2068  NewImm |= NewImm << EltSize;
2069  EltSize *= 2;
2070  }
2071 
2072  (void)OldImm;
2073  assert(((OldImm ^ NewImm) & Demanded.getZExtValue()) == 0 &&
2074  "demanded bits should never be altered");
2075  assert(OldImm != NewImm && "the new imm shouldn't be equal to the old imm");
2076 
2077  // Create the new constant immediate node.
2078  EVT VT = Op.getValueType();
2079  SDLoc DL(Op);
2080  SDValue New;
2081 
2082  // If the new constant immediate is all-zeros or all-ones, let the target
2083  // independent DAG combine optimize this node.
2084  if (NewImm == 0 || NewImm == OrigMask) {
2085  New = TLO.DAG.getNode(Op.getOpcode(), DL, VT, Op.getOperand(0),
2086  TLO.DAG.getConstant(NewImm, DL, VT));
2087  // Otherwise, create a machine node so that target independent DAG combine
2088  // doesn't undo this optimization.
2089  } else {
2090  Enc = AArch64_AM::encodeLogicalImmediate(NewImm, Size);
2091  SDValue EncConst = TLO.DAG.getTargetConstant(Enc, DL, VT);
2092  New = SDValue(
2093  TLO.DAG.getMachineNode(NewOpc, DL, VT, Op.getOperand(0), EncConst), 0);
2094  }
2095 
2096  return TLO.CombineTo(Op, New);
2097 }
2098 
2100  SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts,
2101  TargetLoweringOpt &TLO) const {
2102  // Delay this optimization to as late as possible.
2103  if (!TLO.LegalOps)
2104  return false;
2105 
2107  return false;
2108 
2109  EVT VT = Op.getValueType();
2110  if (VT.isVector())
2111  return false;
2112 
2113  unsigned Size = VT.getSizeInBits();
2114  assert((Size == 32 || Size == 64) &&
2115  "i32 or i64 is expected after legalization.");
2116 
2117  // Exit early if we demand all bits.
2118  if (DemandedBits.countPopulation() == Size)
2119  return false;
2120 
2121  unsigned NewOpc;
2122  switch (Op.getOpcode()) {
2123  default:
2124  return false;
2125  case ISD::AND:
2126  NewOpc = Size == 32 ? AArch64::ANDWri : AArch64::ANDXri;
2127  break;
2128  case ISD::OR:
2129  NewOpc = Size == 32 ? AArch64::ORRWri : AArch64::ORRXri;
2130  break;
2131  case ISD::XOR:
2132  NewOpc = Size == 32 ? AArch64::EORWri : AArch64::EORXri;
2133  break;
2134  }
2135  ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op.getOperand(1));
2136  if (!C)
2137  return false;
2138  uint64_t Imm = C->getZExtValue();
2139  return optimizeLogicalImm(Op, Size, Imm, DemandedBits, TLO, NewOpc);
2140 }
2141 
2142 /// computeKnownBitsForTargetNode - Determine which of the bits specified in
2143 /// Mask are known to be either zero or one and return them Known.
2145  const SDValue Op, KnownBits &Known, const APInt &DemandedElts,
2146  const SelectionDAG &DAG, unsigned Depth) const {
2147  switch (Op.getOpcode()) {
2148  default:
2149  break;
2150  case AArch64ISD::DUP: {
2151  SDValue SrcOp = Op.getOperand(0);
2152  Known = DAG.computeKnownBits(SrcOp, Depth + 1);
2153  if (SrcOp.getValueSizeInBits() != Op.getScalarValueSizeInBits()) {
2154  assert(SrcOp.getValueSizeInBits() > Op.getScalarValueSizeInBits() &&
2155  "Expected DUP implicit truncation");
2156  Known = Known.trunc(Op.getScalarValueSizeInBits());
2157  }
2158  break;
2159  }
2160  case AArch64ISD::CSEL: {
2161  KnownBits Known2;
2162  Known = DAG.computeKnownBits(Op->getOperand(0), Depth + 1);
2163  Known2 = DAG.computeKnownBits(Op->getOperand(1), Depth + 1);
2164  Known = KnownBits::commonBits(Known, Known2);
2165  break;
2166  }
2167  case AArch64ISD::BICi: {
2168  // Compute the bit cleared value.
2169  uint64_t Mask =
2170  ~(Op->getConstantOperandVal(1) << Op->getConstantOperandVal(2));
2171  Known = DAG.computeKnownBits(Op->getOperand(0), Depth + 1);
2172  Known &= KnownBits::makeConstant(APInt(Known.getBitWidth(), Mask));
2173  break;
2174  }
2175  case AArch64ISD::VLSHR: {
2176  KnownBits Known2;
2177  Known = DAG.computeKnownBits(Op->getOperand(0), Depth + 1);
2178  Known2 = DAG.computeKnownBits(Op->getOperand(1), Depth + 1);
2179  Known = KnownBits::lshr(Known, Known2);
2180  break;
2181  }
2182  case AArch64ISD::VASHR: {
2183  KnownBits Known2;
2184  Known = DAG.computeKnownBits(Op->getOperand(0), Depth + 1);
2185  Known2 = DAG.computeKnownBits(Op->getOperand(1), Depth + 1);
2186  Known = KnownBits::ashr(Known, Known2);
2187  break;
2188  }
2189  case AArch64ISD::MOVI: {
2190  ConstantSDNode *CN = cast<ConstantSDNode>(Op->getOperand(0));
2191  Known =
2193  break;
2194  }
2195  case AArch64ISD::LOADgot:
2196  case AArch64ISD::ADDlow: {
2197  if (!Subtarget->isTargetILP32())
2198  break;
2199  // In ILP32 mode all valid pointers are in the low 4GB of the address-space.
2200  Known.Zero = APInt::getHighBitsSet(64, 32);
2201  break;
2202  }
2204  Known = DAG.computeKnownBits(Op->getOperand(0), Depth + 1);
2205  Known.Zero |= APInt(Known.getBitWidth(), 0xFE);
2206  break;
2207  }
2208  case ISD::INTRINSIC_W_CHAIN: {
2209  ConstantSDNode *CN = cast<ConstantSDNode>(Op->getOperand(1));
2210  Intrinsic::ID IntID = static_cast<Intrinsic::ID>(CN->getZExtValue());
2211  switch (IntID) {
2212  default: return;
2213  case Intrinsic::aarch64_ldaxr:
2214  case Intrinsic::aarch64_ldxr: {
2215  unsigned BitWidth = Known.getBitWidth();
2216  EVT VT = cast<MemIntrinsicSDNode>(Op)->getMemoryVT();
2217  unsigned MemBits = VT.getScalarSizeInBits();
2218  Known.Zero |= APInt::getHighBitsSet(BitWidth, BitWidth - MemBits);
2219  return;
2220  }
2221  }
2222  break;
2223  }
2225  case ISD::INTRINSIC_VOID: {
2226  unsigned IntNo = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
2227  switch (IntNo) {
2228  default:
2229  break;
2230  case Intrinsic::aarch64_neon_umaxv:
2231  case Intrinsic::aarch64_neon_uminv: {
2232  // Figure out the datatype of the vector operand. The UMINV instruction
2233  // will zero extend the result, so we can mark as known zero all the
2234  // bits larger than the element datatype. 32-bit or larget doesn't need
2235  // this as those are legal types and will be handled by isel directly.
2236  MVT VT = Op.getOperand(1).getValueType().getSimpleVT();
2237  unsigned BitWidth = Known.getBitWidth();
2238  if (VT == MVT::v8i8 || VT == MVT::v16i8) {
2239  assert(BitWidth >= 8 && "Unexpected width!");
2241  Known.Zero |= Mask;
2242  } else if (VT == MVT::v4i16 || VT == MVT::v8i16) {
2243  assert(BitWidth >= 16 && "Unexpected width!");
2245  Known.Zero |= Mask;
2246  }
2247  break;
2248  } break;
2249  }
2250  }
2251  }
2252 }
2253 
2255  EVT) const {
2256  return MVT::i64;
2257 }
2258 
2260  EVT VT, unsigned AddrSpace, Align Alignment, MachineMemOperand::Flags Flags,
2261  unsigned *Fast) const {
2262  if (Subtarget->requiresStrictAlign())
2263  return false;
2264 
2265  if (Fast) {
2266  // Some CPUs are fine with unaligned stores except for 128-bit ones.
2267  *Fast = !Subtarget->isMisaligned128StoreSlow() || VT.getStoreSize() != 16 ||
2268  // See comments in performSTORECombine() for more details about
2269  // these conditions.
2270 
2271  // Code that uses clang vector extensions can mark that it
2272  // wants unaligned accesses to be treated as fast by
2273  // underspecifying alignment to be 1 or 2.
2274  Alignment <= 2 ||
2275 
2276  // Disregard v2i64. Memcpy lowering produces those and splitting
2277  // them regresses performance on micro-benchmarks and olden/bh.
2278  VT == MVT::v2i64;
2279  }
2280  return true;
2281 }
2282 
2283 // Same as above but handling LLTs instead.
2285  LLT Ty, unsigned AddrSpace, Align Alignment, MachineMemOperand::Flags Flags,
2286  unsigned *Fast) const {
2287  if (Subtarget->requiresStrictAlign())
2288  return false;
2289 
2290  if (Fast) {
2291  // Some CPUs are fine with unaligned stores except for 128-bit ones.
2292  *Fast = !Subtarget->isMisaligned128StoreSlow() ||
2293  Ty.getSizeInBytes() != 16 ||
2294  // See comments in performSTORECombine() for more details about
2295  // these conditions.
2296 
2297  // Code that uses clang vector extensions can mark that it
2298  // wants unaligned accesses to be treated as fast by
2299  // underspecifying alignment to be 1 or 2.
2300  Alignment <= 2 ||
2301 
2302  // Disregard v2i64. Memcpy lowering produces those and splitting
2303  // them regresses performance on micro-benchmarks and olden/bh.
2304  Ty == LLT::fixed_vector(2, 64);
2305  }
2306  return true;
2307 }
2308 
2309 FastISel *
2311  const TargetLibraryInfo *libInfo) const {
2312  return AArch64::createFastISel(funcInfo, libInfo);
2313 }
2314 
2315 const char *AArch64TargetLowering::getTargetNodeName(unsigned Opcode) const {
2316 #define MAKE_CASE(V) \
2317  case V: \
2318  return #V;
2319  switch ((AArch64ISD::NodeType)Opcode) {
2321  break;
2624  }
2625 #undef MAKE_CASE
2626  return nullptr;
2627 }
2628 
2631  MachineBasicBlock *MBB) const {
2632  // We materialise the F128CSEL pseudo-instruction as some control flow and a
2633  // phi node:
2634 
2635  // OrigBB:
2636  // [... previous instrs leading to comparison ...]
2637  // b.ne TrueBB
2638  // b EndBB
2639  // TrueBB:
2640  // ; Fallthrough
2641  // EndBB:
2642  // Dest = PHI [IfTrue, TrueBB], [IfFalse, OrigBB]
2643 
2644  MachineFunction *MF = MBB->getParent();
2645  const TargetInstrInfo *TII = Subtarget->getInstrInfo();
2646  const BasicBlock *LLVM_BB = MBB->getBasicBlock();
2647  DebugLoc DL = MI.getDebugLoc();
2649 
2650  Register DestReg = MI.getOperand(0).getReg();
2651  Register IfTrueReg = MI.getOperand(1).getReg();
2652  Register IfFalseReg = MI.getOperand(2).getReg();
2653  unsigned CondCode = MI.getOperand(3).getImm();
2654  bool NZCVKilled = MI.getOperand(4).isKill();
2655 
2656  MachineBasicBlock *TrueBB = MF->CreateMachineBasicBlock(LLVM_BB);
2657  MachineBasicBlock *EndBB = MF->CreateMachineBasicBlock(LLVM_BB);
2658  MF->insert(It, TrueBB);
2659  MF->insert(It, EndBB);
2660 
2661  // Transfer rest of current basic-block to EndBB
2662  EndBB->splice(EndBB->begin(), MBB, std::next(MachineBasicBlock::iterator(MI)),
2663  MBB->end());
2665 
2666  BuildMI(MBB, DL, TII->get(AArch64::Bcc)).addImm(CondCode).addMBB(TrueBB);
2667  BuildMI(MBB, DL, TII->get(AArch64::B)).addMBB(EndBB);
2668  MBB->addSuccessor(TrueBB);
2669  MBB->addSuccessor(EndBB);
2670 
2671  // TrueBB falls through to the end.
2672  TrueBB->addSuccessor(EndBB);
2673 
2674  if (!NZCVKilled) {
2675  TrueBB->addLiveIn(AArch64::NZCV);
2676  EndBB->addLiveIn(AArch64::NZCV);
2677  }
2678 
2679  BuildMI(*EndBB, EndBB->begin(), DL, TII->get(AArch64::PHI), DestReg)
2680  .addReg(IfTrueReg)
2681  .addMBB(TrueBB)
2682  .addReg(IfFalseReg)
2683  .addMBB(MBB);
2684 
2685  MI.eraseFromParent();
2686  return EndBB;
2687 }
2688 
2690  MachineInstr &MI, MachineBasicBlock *BB) const {
2692  BB->getParent()->getFunction().getPersonalityFn())) &&
2693  "SEH does not use catchret!");
2694  return BB;
2695 }
2696 
2698 AArch64TargetLowering::EmitTileLoad(unsigned Opc, unsigned BaseReg,
2699  MachineInstr &MI,
2700  MachineBasicBlock *BB) const {
2701  const TargetInstrInfo *TII = Subtarget->getInstrInfo();
2702  MachineInstrBuilder MIB = BuildMI(*BB, MI, MI.getDebugLoc(), TII->get(Opc));
2703 
2704  MIB.addReg(BaseReg + MI.getOperand(0).getImm(), RegState::Define);
2705  MIB.add(MI.getOperand(1)); // slice index register
2706  MIB.add(MI.getOperand(2)); // slice index offset
2707  MIB.add(MI.getOperand(3)); // pg
2708  MIB.add(MI.getOperand(4)); // base
2709  MIB.add(MI.getOperand(5)); // offset
2710 
2711  MI.eraseFromParent(); // The pseudo is gone now.
2712  return BB;
2713 }
2714 
2717  const TargetInstrInfo *TII = Subtarget->getInstrInfo();
2718  MachineInstrBuilder MIB =
2719  BuildMI(*BB, MI, MI.getDebugLoc(), TII->get(AArch64::LDR_ZA));
2720 
2721  MIB.addReg(AArch64::ZA, RegState::Define);
2722  MIB.add(MI.getOperand(0)); // Vector select register
2723  MIB.add(MI.getOperand(1)); // Vector select offset
2724  MIB.add(MI.getOperand(2)); // Base
2725  MIB.add(MI.getOperand(1)); // Offset, same as vector select offset
2726 
2727  MI.eraseFromParent(); // The pseudo is gone now.
2728  return BB;
2729 }
2730 
2732 AArch64TargetLowering::EmitZAInstr(unsigned Opc, unsigned BaseReg,
2733  MachineInstr &MI,
2734  MachineBasicBlock *BB, bool HasTile) const {
2735  const TargetInstrInfo *TII = Subtarget->getInstrInfo();
2736  MachineInstrBuilder MIB = BuildMI(*BB, MI, MI.getDebugLoc(), TII->get(Opc));
2737  unsigned StartIdx = 0;
2738 
2739  if (HasTile) {
2740  MIB.addReg(BaseReg + MI.getOperand(0).getImm(), RegState::Define);
2741  MIB.addReg(BaseReg + MI.getOperand(0).getImm());
2742  StartIdx = 1;
2743  } else
2744  MIB.addReg(BaseReg, RegState::Define).addReg(BaseReg);
2745 
2746  for (unsigned I = StartIdx; I < MI.getNumOperands(); ++I)
2747  MIB.add(MI.getOperand(I));
2748 
2749  MI.eraseFromParent(); // The pseudo is gone now.
2750  return BB;
2751 }
2752 
2755  const TargetInstrInfo *TII = Subtarget->getInstrInfo();
2756  MachineInstrBuilder MIB =
2757  BuildMI(*BB, MI, MI.getDebugLoc(), TII->get(AArch64::ZERO_M));
2758  MIB.add(MI.getOperand(0)); // Mask
2759 
2760  unsigned Mask = MI.getOperand(0).getImm();
2761  for (unsigned I = 0; I < 8; I++) {
2762  if (Mask & (1 << I))
2763  MIB.addDef(AArch64::ZAD0 + I, RegState::ImplicitDefine);
2764  }
2765 
2766  MI.eraseFromParent(); // The pseudo is gone now.
2767  return BB;
2768 }
2769 
2771  MachineInstr &MI, MachineBasicBlock *BB) const {
2772 
2773  int SMEOrigInstr = AArch64::getSMEPseudoMap(MI.getOpcode());
2774  if (SMEOrigInstr != -1) {
2775  const TargetInstrInfo *TII = Subtarget->getInstrInfo();
2777  TII->get(MI.getOpcode()).TSFlags & AArch64::SMEMatrixTypeMask;
2778  switch (SMEMatrixType) {
2779  case (AArch64::SMEMatrixArray):
2780  return EmitZAInstr(SMEOrigInstr, AArch64::ZA, MI, BB, /*HasTile*/ false);
2781  case (AArch64::SMEMatrixTileB):
2782  return EmitZAInstr(SMEOrigInstr, AArch64::ZAB0, MI, BB, /*HasTile*/ true);
2783  case (AArch64::SMEMatrixTileH):
2784  return EmitZAInstr(SMEOrigInstr, AArch64::ZAH0, MI, BB, /*HasTile*/ true);
2785  case (AArch64::SMEMatrixTileS):
2786  return EmitZAInstr(SMEOrigInstr, AArch64::ZAS0, MI, BB, /*HasTile*/ true);
2787  case (AArch64::SMEMatrixTileD):
2788  return EmitZAInstr(SMEOrigInstr, AArch64::ZAD0, MI, BB, /*HasTile*/ true);
2789  case (AArch64::SMEMatrixTileQ):
2790  return EmitZAInstr(SMEOrigInstr, AArch64::ZAQ0, MI, BB, /*HasTile*/ true);
2791  }
2792  }
2793 
2794  switch (MI.getOpcode()) {
2795  default:
2796 #ifndef NDEBUG
2797  MI.dump();
2798 #endif
2799  llvm_unreachable("Unexpected instruction for custom inserter!");
2800 
2801  case AArch64::F128CSEL:
2802  return EmitF128CSEL(MI, BB);
2803  case TargetOpcode::STATEPOINT:
2804  // STATEPOINT is a pseudo instruction which has no implicit defs/uses
2805  // while bl call instruction (where statepoint will be lowered at the end)
2806  // has implicit def. This def is early-clobber as it will be set at
2807  // the moment of the call and earlier than any use is read.
2808  // Add this implicit dead def here as a workaround.
2809  MI.addOperand(*MI.getMF(),
2811  AArch64::LR, /*isDef*/ true,
2812  /*isImp*/ true, /*isKill*/ false, /*isDead*/ true,
2813  /*isUndef*/ false, /*isEarlyClobber*/ true));
2814  [[fallthrough]];
2817  return emitPatchPoint(MI, BB);
2818 
2819  case AArch64::CATCHRET:
2820  return EmitLoweredCatchRet(MI, BB);
2821  case AArch64::LD1_MXIPXX_H_PSEUDO_B:
2822  return EmitTileLoad(AArch64::LD1_MXIPXX_H_B, AArch64::ZAB0, MI, BB);
2823  case AArch64::LD1_MXIPXX_H_PSEUDO_H:
2824  return EmitTileLoad(AArch64::LD1_MXIPXX_H_H, AArch64::ZAH0, MI, BB);
2825  case AArch64::LD1_MXIPXX_H_PSEUDO_S:
2826  return EmitTileLoad(AArch64::LD1_MXIPXX_H_S, AArch64::ZAS0, MI, BB);
2827  case AArch64::LD1_MXIPXX_H_PSEUDO_D:
2828  return EmitTileLoad(AArch64::LD1_MXIPXX_H_D, AArch64::ZAD0, MI, BB);
2829  case AArch64::LD1_MXIPXX_H_PSEUDO_Q:
2830  return EmitTileLoad(AArch64::LD1_MXIPXX_H_Q, AArch64::ZAQ0, MI, BB);
2831  case AArch64::LD1_MXIPXX_V_PSEUDO_B:
2832  return EmitTileLoad(AArch64::LD1_MXIPXX_V_B, AArch64::ZAB0, MI, BB);
2833  case AArch64::LD1_MXIPXX_V_PSEUDO_H:
2834  return EmitTileLoad(AArch64::LD1_MXIPXX_V_H, AArch64::ZAH0, MI, BB);
2835  case AArch64::LD1_MXIPXX_V_PSEUDO_S:
2836  return EmitTileLoad(AArch64::LD1_MXIPXX_V_S, AArch64::ZAS0, MI, BB);
2837  case AArch64::LD1_MXIPXX_V_PSEUDO_D:
2838  return EmitTileLoad(AArch64::LD1_MXIPXX_V_D, AArch64::ZAD0, MI, BB);
2839  case AArch64::LD1_MXIPXX_V_PSEUDO_Q:
2840  return EmitTileLoad(AArch64::LD1_MXIPXX_V_Q, AArch64::ZAQ0, MI, BB);
2841  case AArch64::LDR_ZA_PSEUDO:
2842  return EmitFill(MI, BB);
2843  case AArch64::ZERO_M_PSEUDO:
2844  return EmitZero(MI, BB);
2845  }
2846 }
2847 
2848 //===----------------------------------------------------------------------===//
2849 // AArch64 Lowering private implementation.
2850 //===----------------------------------------------------------------------===//
2851 
2852 //===----------------------------------------------------------------------===//
2853 // Lowering Code
2854 //===----------------------------------------------------------------------===//
2855 
2856 // Forward declarations of SVE fixed length lowering helpers
2861  SelectionDAG &DAG);
2863  EVT VT);
2864 
2865 /// isZerosVector - Check whether SDNode N is a zero-filled vector.
2866 static bool isZerosVector(const SDNode *N) {
2867  // Look through a bit convert.
2868  while (N->getOpcode() == ISD::BITCAST)
2869  N = N->getOperand(0).getNode();
2870 
2872  return true;
2873 
2874  if (N->getOpcode() != AArch64ISD::DUP)
2875  return false;
2876 
2877  auto Opnd0 = N->getOperand(0);
2878  return isNullConstant(Opnd0) || isNullFPConstant(Opnd0);
2879 }
2880 
2881 /// changeIntCCToAArch64CC - Convert a DAG integer condition code to an AArch64
2882 /// CC
2884  switch (CC) {
2885  default:
2886  llvm_unreachable("Unknown condition code!");
2887  case ISD::SETNE:
2888  return AArch64CC::NE;
2889  case ISD::SETEQ:
2890  return AArch64CC::EQ;
2891  case ISD::SETGT:
2892  return AArch64CC::GT;
2893  case ISD::SETGE:
2894  return AArch64CC::GE;
2895  case ISD::SETLT:
2896  return AArch64CC::LT;
2897  case ISD::SETLE:
2898  return AArch64CC::LE;
2899  case ISD::SETUGT:
2900  return AArch64CC::HI;
2901  case ISD::SETUGE:
2902  return AArch64CC::HS;
2903  case ISD::SETULT:
2904  return AArch64CC::LO;
2905  case ISD::SETULE:
2906  return AArch64CC::LS;
2907  }
2908 }
2909 
2910 /// changeFPCCToAArch64CC - Convert a DAG fp condition code to an AArch64 CC.
2913  AArch64CC::CondCode &CondCode2) {
2914  CondCode2 = AArch64CC::AL;
2915  switch (CC) {
2916  default:
2917  llvm_unreachable("Unknown FP condition!");
2918  case ISD::SETEQ:
2919  case ISD::SETOEQ:
2921  break;
2922  case ISD::SETGT:
2923  case ISD::SETOGT:
2925  break;
2926  case ISD::SETGE:
2927  case ISD::SETOGE:
2929  break;
2930  case ISD::SETOLT:
2932  break;
2933  case ISD::SETOLE:
2935  break;
2936  case ISD::SETONE:
2938  CondCode2 = AArch64CC::GT;
2939  break;
2940  case ISD::SETO:
2942  break;
2943  case ISD::SETUO:
2945  break;
2946  case ISD::SETUEQ:
2948  CondCode2 = AArch64CC::VS;
2949  break;
2950  case ISD::SETUGT:
2952  break;
2953  case ISD::SETUGE:
2955  break;
2956  case ISD::SETLT:
2957  case ISD::SETULT:
2959  break;
2960  case ISD::SETLE:
2961  case ISD::SETULE:
2963  break;
2964  case ISD::SETNE:
2965  case ISD::SETUNE:
2967  break;
2968  }
2969 }
2970 
2971 /// Convert a DAG fp condition code to an AArch64 CC.
2972 /// This differs from changeFPCCToAArch64CC in that it returns cond codes that
2973 /// should be AND'ed instead of OR'ed.
2976  AArch64CC::CondCode &CondCode2) {
2977  CondCode2 = AArch64CC::AL;
2978  switch (CC) {
2979  default:
2980  changeFPCCToAArch64CC(CC, CondCode, CondCode2);
2981  assert(CondCode2 == AArch64CC::AL);
2982  break;
2983  case ISD::SETONE:
2984  // (a one b)
2985  // == ((a olt b) || (a ogt b))
2986  // == ((a ord b) && (a une b))
2988  CondCode2 = AArch64CC::NE;
2989  break;
2990  case ISD::SETUEQ:
2991  // (a ueq b)
2992  // == ((a uno b) || (a oeq b))
2993  // == ((a ule b) && (a uge b))
2995  CondCode2 = AArch64CC::LE;
2996  break;
2997  }
2998 }
2999 
3000 /// changeVectorFPCCToAArch64CC - Convert a DAG fp condition code to an AArch64
3001 /// CC usable with the vector instructions. Fewer operations are available
3002 /// without a real NZCV register, so we have to use less efficient combinations
3003 /// to get the same effect.
3006  AArch64CC::CondCode &CondCode2,
3007  bool &Invert) {
3008  Invert = false;
3009  switch (CC) {
3010  default:
3011  // Mostly the scalar mappings work fine.
3012  changeFPCCToAArch64CC(CC, CondCode, CondCode2);
3013  break;
3014  case ISD::SETUO:
3015  Invert = true;
3016  [[fallthrough]];
3017  case ISD::SETO:
3019  CondCode2 = AArch64CC::GE;
3020  break;
3021  case ISD::SETUEQ:
3022  case ISD::SETULT:
3023  case ISD::SETULE:
3024  case ISD::SETUGT:
3025  case ISD::SETUGE:
3026  // All of the compare-mask comparisons are ordered, but we can switch
3027  // between the two by a double inversion. E.g. ULE == !OGT.
3028  Invert = true;
3029  changeFPCCToAArch64CC(getSetCCInverse(CC, /* FP inverse */ MVT::f32),
3030  CondCode, CondCode2);
3031  break;
3032  }
3033 }
3034 
3036  // Matches AArch64DAGToDAGISel::SelectArithImmed().
3037  bool IsLegal = (C >> 12 == 0) || ((C & 0xFFFULL) == 0 && C >> 24 == 0);
3038  LLVM_DEBUG(dbgs() << "Is imm " << C
3039  << " legal: " << (IsLegal ? "yes\n" : "no\n"));
3040  return IsLegal;
3041 }
3042 
3043 // Can a (CMP op1, (sub 0, op2) be turned into a CMN instruction on
3044 // the grounds that "op1 - (-op2) == op1 + op2" ? Not always, the C and V flags
3045 // can be set differently by this operation. It comes down to whether
3046 // "SInt(~op2)+1 == SInt(~op2+1)" (and the same for UInt). If they are then
3047 // everything is fine. If not then the optimization is wrong. Thus general
3048 // comparisons are only valid if op2 != 0.
3049 //
3050 // So, finally, the only LLVM-native comparisons that don't mention C and V
3051 // are SETEQ and SETNE. They're the only ones we can safely use CMN for in
3052 // the absence of information about op2.
3054  return Op.getOpcode() == ISD::SUB && isNullConstant(Op.getOperand(0)) &&
3055  (CC == ISD::SETEQ || CC == ISD::SETNE);
3056 }
3057 
3059  SelectionDAG &DAG, SDValue Chain,
3060  bool IsSignaling) {
3061  EVT VT = LHS.getValueType();
3062  assert(VT != MVT::f128);
3063 
3064  const bool FullFP16 = DAG.getSubtarget<AArch64Subtarget>().hasFullFP16();
3065 
3066  if (VT == MVT::f16 && !FullFP16) {
3068  {Chain, LHS});
3070  {LHS.getValue(1), RHS});
3071  Chain = RHS.getValue(1);
3072  VT = MVT::f32;
3073  }
3074  unsigned Opcode =
3076  return DAG.getNode(Opcode, dl, {VT, MVT::Other}, {Chain, LHS, RHS});
3077 }
3078 
3080  const SDLoc &dl, SelectionDAG &DAG) {
3081  EVT VT = LHS.getValueType();
3082  const bool FullFP16 = DAG.getSubtarget<AArch64Subtarget>().hasFullFP16();
3083 
3084  if (VT.isFloatingPoint()) {
3085  assert(VT != MVT::f128);
3086  if (VT == MVT::f16 && !FullFP16) {
3087  LHS = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f32, LHS);
3088  RHS = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f32, RHS);
3089  VT = MVT::f32;
3090  }
3091  return DAG.getNode(AArch64ISD::FCMP, dl, VT, LHS, RHS);
3092  }
3093 
3094  // The CMP instruction is just an alias for SUBS, and representing it as
3095  // SUBS means that it's possible to get CSE with subtract operations.
3096  // A later phase can perform the optimization of setting the destination
3097  // register to WZR/XZR if it ends up being unused.
3098  unsigned Opcode = AArch64ISD::SUBS;
3099 
3100  if (isCMN(RHS, CC)) {
3101  // Can we combine a (CMP op1, (sub 0, op2) into a CMN instruction ?
3102  Opcode = AArch64ISD::ADDS;
3103  RHS = RHS.getOperand(1);
3104  } else if (isCMN(LHS, CC)) {
3105  // As we are looking for EQ/NE compares, the operands can be commuted ; can
3106  // we combine a (CMP (sub 0, op1), op2) into a CMN instruction ?
3107  Opcode = AArch64ISD::ADDS;
3108  LHS = LHS.getOperand(1);
3109  } else if (isNullConstant(RHS) && !isUnsignedIntSetCC(CC)) {
3110  if (LHS.getOpcode() == ISD::AND) {
3111  // Similarly, (CMP (and X, Y), 0) can be implemented with a TST
3112  // (a.k.a. ANDS) except that the flags are only guaranteed to work for one
3113  // of the signed comparisons.
3114  const SDValue ANDSNode = DAG.getNode(AArch64ISD::ANDS, dl,
3115  DAG.getVTList(VT, MVT_CC),
3116  LHS.getOperand(0),
3117  LHS.getOperand(1));
3118  // Replace all users of (and X, Y) with newly generated (ands X, Y)
3119  DAG.ReplaceAllUsesWith(LHS, ANDSNode);
3120  return ANDSNode.getValue(1);
3121  } else if (LHS.getOpcode() == AArch64ISD::ANDS) {
3122  // Use result of ANDS
3123  return LHS.getValue(1);
3124  }
3125  }
3126 
3127  return DAG.getNode(Opcode, dl, DAG.getVTList(VT, MVT_CC), LHS, RHS)
3128  .getValue(1);
3129 }
3130 
3131 /// \defgroup AArch64CCMP CMP;CCMP matching
3132 ///
3133 /// These functions deal with the formation of CMP;CCMP;... sequences.
3134 /// The CCMP/CCMN/FCCMP/FCCMPE instructions allow the conditional execution of
3135 /// a comparison. They set the NZCV flags to a predefined value if their
3136 /// predicate is false. This allows to express arbitrary conjunctions, for
3137 /// example "cmp 0 (and (setCA (cmp A)) (setCB (cmp B)))"
3138 /// expressed as:
3139 /// cmp A
3140 /// ccmp B, inv(CB), CA
3141 /// check for CB flags
3142 ///
3143 /// This naturally lets us implement chains of AND operations with SETCC
3144 /// operands. And we can even implement some other situations by transforming
3145 /// them:
3146 /// - We can implement (NEG SETCC) i.e. negating a single comparison by
3147 /// negating the flags used in a CCMP/FCCMP operations.
3148 /// - We can negate the result of a whole chain of CMP/CCMP/FCCMP operations
3149 /// by negating the flags we test for afterwards. i.e.
3150 /// NEG (CMP CCMP CCCMP ...) can be implemented.
3151 /// - Note that we can only ever negate all previously processed results.
3152 /// What we can not implement by flipping the flags to test is a negation
3153 /// of two sub-trees (because the negation affects all sub-trees emitted so
3154 /// far, so the 2nd sub-tree we emit would also affect the first).
3155 /// With those tools we can implement some OR operations:
3156 /// - (OR (SETCC A) (SETCC B)) can be implemented via:
3157 /// NEG (AND (NEG (SETCC A)) (NEG (SETCC B)))
3158 /// - After transforming OR to NEG/AND combinations we may be able to use NEG
3159 /// elimination rules from earlier to implement the whole thing as a
3160 /// CCMP/FCCMP chain.
3161 ///
3162 /// As complete example:
3163 /// or (or (setCA (cmp A)) (setCB (cmp B)))
3164 /// (and (setCC (cmp C)) (setCD (cmp D)))"
3165 /// can be reassociated to:
3166 /// or (and (setCC (cmp C)) setCD (cmp D))
3167 // (or (setCA (cmp A)) (setCB (cmp B)))
3168 /// can be transformed to:
3169 /// not (and (not (and (setCC (cmp C)) (setCD (cmp D))))
3170 /// (and (not (setCA (cmp A)) (not (setCB (cmp B))))))"
3171 /// which can be implemented as:
3172 /// cmp C
3173 /// ccmp D, inv(CD), CC
3174 /// ccmp A, CA, inv(CD)
3175 /// ccmp B, CB, inv(CA)
3176 /// check for CB flags
3177 ///
3178 /// A counterexample is "or (and A B) (and C D)" which translates to
3179 /// not (and (not (and (not A) (not B))) (not (and (not C) (not D)))), we
3180 /// can only implement 1 of the inner (not) operations, but not both!
3181 /// @{
3182 
3183 /// Create a conditional comparison; Use CCMP, CCMN or FCCMP as appropriate.
3185  ISD::CondCode CC, SDValue CCOp,
3187  AArch64CC::CondCode OutCC,
3188  const SDLoc &DL, SelectionDAG &DAG) {
3189  unsigned Opcode = 0;
3190  const bool FullFP16 = DAG.getSubtarget<AArch64Subtarget>().hasFullFP16();
3191 
3192  if (LHS.getValueType().isFloatingPoint()) {
3193  assert(LHS.getValueType() != MVT::f128);
3194  if (LHS.getValueType() == MVT::f16 && !FullFP16) {
3197  }
3198  Opcode = AArch64ISD::FCCMP;
3199  } else if (RHS.getOpcode() == ISD::SUB) {
3200  SDValue SubOp0 = RHS.getOperand(0);
3201  if (isNullConstant(SubOp0) && (CC == ISD::SETEQ || CC == ISD::SETNE)) {
3202  // See emitComparison() on why we can only do this for SETEQ and SETNE.
3203  Opcode = AArch64ISD::CCMN;
3204  RHS = RHS.getOperand(1);
3205  }
3206  }
3207  if (Opcode == 0)
3208  Opcode = AArch64ISD::CCMP;
3209 
3210  SDValue Condition = DAG.getConstant(Predicate, DL, MVT_CC);
3212  unsigned NZCV = AArch64CC::getNZCVToSatisfyCondCode(InvOutCC);
3213  SDValue NZCVOp = DAG.getConstant(NZCV, DL, MVT::i32);
3214  return DAG.getNode(Opcode, DL, MVT_CC, LHS, RHS, NZCVOp, Condition, CCOp);
3215 }
3216 
3217 /// Returns true if @p Val is a tree of AND/OR/SETCC operations that can be
3218 /// expressed as a conjunction. See \ref AArch64CCMP.
3219 /// \param CanNegate Set to true if we can negate the whole sub-tree just by
3220 /// changing the conditions on the SETCC tests.
3221 /// (this means we can call emitConjunctionRec() with
3222 /// Negate==true on this sub-tree)
3223 /// \param MustBeFirst Set to true if this subtree needs to be negated and we
3224 /// cannot do the negation naturally. We are required to
3225 /// emit the subtree first in this case.
3226 /// \param WillNegate Is true if are called when the result of this
3227 /// subexpression must be negated. This happens when the
3228 /// outer expression is an OR. We can use this fact to know
3229 /// that we have a double negation (or (or ...) ...) that
3230 /// can be implemented for free.
3231 static bool canEmitConjunction(const SDValue Val, bool &CanNegate,
3232  bool &MustBeFirst, bool WillNegate,
3233  unsigned Depth = 0) {
3234  if (!Val.hasOneUse())
3235  return false;
3236  unsigned Opcode = Val->getOpcode();
3237  if (Opcode == ISD::SETCC) {
3238  if (Val->getOperand(0).getValueType() == MVT::f128)
3239  return false;
3240  CanNegate = true;
3241  MustBeFirst = false;
3242  return true;
3243  }
3244  // Protect against exponential runtime and stack overflow.
3245  if (Depth > 6)
3246  return false;
3247  if (Opcode == ISD::AND || Opcode == ISD::OR) {
3248  bool IsOR = Opcode == ISD::OR;
3249  SDValue O0 = Val->getOperand(0);
3250  SDValue O1 = Val->getOperand(1);
3251  bool CanNegateL;
3252  bool MustBeFirstL;
3253  if (!canEmitConjunction(O0, CanNegateL, MustBeFirstL, IsOR, Depth+1))
3254  return false;
3255  bool CanNegateR;
3256  bool MustBeFirstR;
3257  if (!canEmitConjunction(O1, CanNegateR, MustBeFirstR, IsOR, Depth+1))
3258  return false;
3259 
3260  if (MustBeFirstL && MustBeFirstR)
3261  return false;
3262 
3263  if (IsOR) {
3264  // For an OR expression we need to be able to naturally negate at least
3265  // one side or we cannot do the transformation at all.
3266  if (!CanNegateL && !CanNegateR)
3267  return false;
3268  // If we the result of the OR will be negated and we can naturally negate
3269  // the leafs, then this sub-tree as a whole negates naturally.
3270  CanNegate = WillNegate && CanNegateL && CanNegateR;
3271  // If we cannot naturally negate the whole sub-tree, then this must be
3272  // emitted first.
3273  MustBeFirst = !CanNegate;
3274  } else {
3275  assert(Opcode == ISD::AND && "Must be OR or AND");
3276  // We cannot naturally negate an AND operation.
3277  CanNegate = false;
3278  MustBeFirst = MustBeFirstL || MustBeFirstR;
3279  }
3280  return true;
3281  }
3282  return false;
3283 }
3284 
3285 /// Emit conjunction or disjunction tree with the CMP/FCMP followed by a chain
3286 /// of CCMP/CFCMP ops. See @ref AArch64CCMP.
3287 /// Tries to transform the given i1 producing node @p Val to a series compare
3288 /// and conditional compare operations. @returns an NZCV flags producing node
3289 /// and sets @p OutCC to the flags that should be tested or returns SDValue() if
3290 /// transformation was not possible.
3291 /// \p Negate is true if we want this sub-tree being negated just by changing
3292 /// SETCC conditions.
3294  AArch64CC::CondCode &OutCC, bool Negate, SDValue CCOp,
3296  // We're at a tree leaf, produce a conditional comparison operation.
3297  unsigned Opcode = Val->getOpcode();
3298  if (Opcode == ISD::SETCC) {
3299  SDValue LHS = Val->getOperand(0);
3300  SDValue RHS = Val->getOperand(1);
3301  ISD::CondCode CC = cast<CondCodeSDNode>(Val->getOperand(2))->get();
3302  bool isInteger = LHS.getValueType().isInteger();
3303  if (Negate)
3304  CC = getSetCCInverse(CC, LHS.getValueType());
3305  SDLoc DL(Val);
3306  // Determine OutCC and handle FP special case.
3307  if (isInteger) {
3308  OutCC = changeIntCCToAArch64CC(CC);
3309  } else {
3310  assert(LHS.getValueType().isFloatingPoint());
3311  AArch64CC::CondCode ExtraCC;
3312  changeFPCCToANDAArch64CC(CC, OutCC, ExtraCC);
3313  // Some floating point conditions can't be tested with a single condition
3314  // code. Construct an additional comparison in this case.
3315  if (ExtraCC != AArch64CC::AL) {
3316  SDValue ExtraCmp;
3317  if (!CCOp.getNode())
3318  ExtraCmp = emitComparison(LHS, RHS, CC, DL, DAG);
3319  else
3320  ExtraCmp = emitConditionalComparison(LHS, RHS, CC, CCOp, Predicate,
3321  ExtraCC, DL, DAG);
3322  CCOp = ExtraCmp;
3323  Predicate = ExtraCC;
3324  }
3325  }
3326 
3327  // Produce a normal comparison if we are first in the chain
3328  if (!CCOp)
3329  return emitComparison(LHS, RHS, CC, DL, DAG);
3330  // Otherwise produce a ccmp.
3331  return emitConditionalComparison(LHS, RHS, CC, CCOp, Predicate, OutCC, DL,
3332  DAG);
3333  }
3334  assert(Val->hasOneUse() && "Valid conjunction/disjunction tree");
3335 
3336  bool IsOR = Opcode == ISD::OR;
3337 
3338  SDValue LHS = Val->getOperand(0);
3339  bool CanNegateL;
3340  bool MustBeFirstL;
3341  bool ValidL = canEmitConjunction(LHS, CanNegateL, MustBeFirstL, IsOR);
3342  assert(ValidL && "Valid conjunction/disjunction tree");
3343  (void)ValidL;
3344 
3345  SDValue RHS = Val->getOperand(1);
3346  bool CanNegateR;
3347  bool MustBeFirstR;
3348  bool ValidR = canEmitConjunction(RHS, CanNegateR, MustBeFirstR, IsOR);
3349  assert(ValidR && "Valid conjunction/disjunction tree");
3350  (void)ValidR;
3351 
3352  // Swap sub-tree that must come first to the right side.
3353  if (MustBeFirstL) {
3354  assert(!MustBeFirstR && "Valid conjunction/disjunction tree");
3355  std::swap(LHS, RHS);
3356  std::swap(CanNegateL, CanNegateR);
3357  std::swap(MustBeFirstL, MustBeFirstR);
3358  }
3359 
3360  bool NegateR;
3361  bool NegateAfterR;
3362  bool NegateL;
3363  bool NegateAfterAll;
3364  if (Opcode == ISD::OR) {
3365  // Swap the sub-tree that we can negate naturally to the left.
3366  if (!CanNegateL) {
3367  assert(CanNegateR && "at least one side must be negatable");
3368  assert(!MustBeFirstR && "invalid conjunction/disjunction tree");
3369  assert(!Negate);
3370  std::swap(LHS, RHS);
3371  NegateR = false;
3372  NegateAfterR = true;
3373  } else {
3374  // Negate the left sub-tree if possible, otherwise negate the result.
3375  NegateR = CanNegateR;
3376  NegateAfterR = !CanNegateR;
3377  }
3378  NegateL = true;
3379  NegateAfterAll = !Negate;
3380  } else {
3381  assert(Opcode == ISD::AND && "Valid conjunction/disjunction tree");
3382  assert(!Negate && "Valid conjunction/disjunction tree");
3383 
3384  NegateL = false;
3385  NegateR = false;
3386  NegateAfterR = false;
3387  NegateAfterAll = false;
3388  }
3389 
3390  // Emit sub-trees.
3391  AArch64CC::CondCode RHSCC;
3392  SDValue CmpR = emitConjunctionRec(DAG, RHS, RHSCC, NegateR, CCOp, Predicate);
3393  if (NegateAfterR)
3394  RHSCC = AArch64CC::getInvertedCondCode(RHSCC);
3395  SDValue CmpL = emitConjunctionRec(DAG, LHS, OutCC, NegateL, CmpR, RHSCC);
3396  if (NegateAfterAll)
3397  OutCC = AArch64CC::getInvertedCondCode(OutCC);
3398  return CmpL;
3399 }
3400 
3401 /// Emit expression as a conjunction (a series of CCMP/CFCMP ops).
3402 /// In some cases this is even possible with OR operations in the expression.
3403 /// See \ref AArch64CCMP.
3404 /// \see emitConjunctionRec().
3406  AArch64CC::CondCode &OutCC) {
3407  bool DummyCanNegate;
3408  bool DummyMustBeFirst;
3409  if (!canEmitConjunction(Val, DummyCanNegate, DummyMustBeFirst, false))
3410  return SDValue();
3411 
3412  return emitConjunctionRec(DAG, Val, OutCC, false, SDValue(), AArch64CC::AL);
3413 }
3414 
3415 /// @}
3416 
3417 /// Returns how profitable it is to fold a comparison's operand's shift and/or
3418 /// extension operations.
3420  auto isSupportedExtend = [&](SDValue V) {
3421  if (V.getOpcode() == ISD::SIGN_EXTEND_INREG)
3422  return true;
3423 
3424  if (V.getOpcode() == ISD::AND)
3425  if (ConstantSDNode *MaskCst = dyn_cast<ConstantSDNode>(V.getOperand(1))) {
3426  uint64_t Mask = MaskCst->getZExtValue();
3427  return (Mask == 0xFF || Mask == 0xFFFF || Mask == 0xFFFFFFFF);
3428  }
3429 
3430  return false;
3431  };
3432 
3433  if (!Op.hasOneUse())
3434  return 0;
3435 
3436  if (isSupportedExtend(Op))
3437  return 1;
3438 
3439  unsigned Opc = Op.getOpcode();
3440  if (Opc == ISD::SHL || Opc == ISD::SRL || Opc == ISD::SRA)
3441  if (ConstantSDNode *ShiftCst = dyn_cast<ConstantSDNode>(Op.getOperand(1))) {
3442  uint64_t Shift = ShiftCst->getZExtValue();
3443  if (isSupportedExtend(Op.getOperand(0)))
3444  return (Shift <= 4) ? 2 : 1;
3445  EVT VT = Op.getValueType();
3446  if ((VT == MVT::i32 && Shift <= 31) || (VT == MVT::i64 && Shift <= 63))
3447  return 1;
3448  }
3449 
3450  return 0;
3451 }
3452 
3454  SDValue &AArch64cc, SelectionDAG &DAG,
3455  const SDLoc &dl) {
3456  if (ConstantSDNode *RHSC = dyn_cast<ConstantSDNode>(RHS.getNode())) {
3457  EVT VT = RHS.getValueType();
3458  uint64_t C = RHSC->getZExtValue();
3459  if (!isLegalArithImmed(C)) {
3460  // Constant does not fit, try adjusting it by one?
3461  switch (CC) {
3462  default:
3463  break;
3464  case ISD::SETLT:
3465  case ISD::SETGE:
3466  if ((VT == MVT::i32 && C != 0x80000000 &&
3467  isLegalArithImmed((uint32_t)(C - 1))) ||
3468  (VT == MVT::i64 && C != 0x80000000ULL &&
3469  isLegalArithImmed(C - 1ULL))) {
3470  CC = (CC == ISD::SETLT) ? ISD::SETLE : ISD::SETGT;
3471  C = (VT == MVT::i32) ? (uint32_t)(C - 1) : C - 1;
3472  RHS = DAG.getConstant(C, dl, VT);
3473  }
3474  break;
3475  case ISD::SETULT:
3476  case ISD::SETUGE:
3477  if ((VT == MVT::i32 && C != 0 &&
3478  isLegalArithImmed((uint32_t)(C - 1))) ||
3479  (VT == MVT::i64 && C != 0ULL && isLegalArithImmed(C - 1ULL))) {
3481  C = (VT == MVT::i32) ? (uint32_t)(C - 1) : C - 1;
3482  RHS = DAG.getConstant(C, dl, VT);
3483  }
3484  break;
3485  case ISD::SETLE:
3486  case ISD::SETGT:
3487  if ((VT == MVT::i32 && C != INT32_MAX &&
3488  isLegalArithImmed((uint32_t)(C + 1))) ||
3489  (VT == MVT::i64 && C != INT64_MAX &&
3490  isLegalArithImmed(C + 1ULL))) {
3491  CC = (CC == ISD::SETLE) ? ISD::SETLT : ISD::SETGE;
3492  C = (VT == MVT::i32) ? (uint32_t)(C + 1) : C + 1;
3493  RHS = DAG.getConstant(C, dl, VT);
3494  }
3495  break;
3496  case ISD::SETULE:
3497  case ISD::SETUGT:
3498  if ((VT == MVT::i32 && C != UINT32_MAX &&
3499  isLegalArithImmed((uint32_t)(C + 1))) ||
3500  (VT == MVT::i64 && C != UINT64_MAX &&
3501  isLegalArithImmed(C + 1ULL))) {
3503  C = (VT == MVT::i32) ? (uint32_t)(C + 1) : C + 1;
3504  RHS = DAG.getConstant(C, dl, VT);
3505  }
3506  break;
3507  }
3508  }
3509  }
3510 
3511  // Comparisons are canonicalized so that the RHS operand is simpler than the
3512  // LHS one, the extreme case being when RHS is an immediate. However, AArch64
3513  // can fold some shift+extend operations on the RHS operand, so swap the
3514  // operands if that can be done.
3515  //
3516  // For example:
3517  // lsl w13, w11, #1
3518  // cmp w13, w12
3519  // can be turned into:
3520  // cmp w12, w11, lsl #1
3521  if (!isa<ConstantSDNode>(RHS) ||
3522  !isLegalArithImmed(cast<ConstantSDNode>(RHS)->getZExtValue())) {
3523  SDValue TheLHS = isCMN(LHS, CC) ? LHS.getOperand(1) : LHS;
3524 
3526  std::swap(LHS, RHS);
3528  }
3529  }
3530 
3531  SDValue Cmp;
3532  AArch64CC::CondCode AArch64CC;
3533  if ((CC == ISD::SETEQ || CC == ISD::SETNE) && isa<ConstantSDNode>(RHS)) {
3534  const ConstantSDNode *RHSC = cast<ConstantSDNode>(RHS);
3535 
3536  // The imm operand of ADDS is an unsigned immediate, in the range 0 to 4095.
3537  // For the i8 operand, the largest immediate is 255, so this can be easily
3538  // encoded in the compare instruction. For the i16 operand, however, the
3539  // largest immediate cannot be encoded in the compare.
3540  // Therefore, use a sign extending load and cmn to avoid materializing the
3541  // -1 constant. For example,
3542  // movz w1, #65535
3543  // ldrh w0, [x0, #0]
3544  // cmp w0, w1
3545  // >
3546  // ldrsh w0, [x0, #0]
3547  // cmn w0, #1
3548  // Fundamental, we're relying on the property that (zext LHS) == (zext RHS)
3549  // if and only if (sext LHS) == (sext RHS). The checks are in place to
3550  // ensure both the LHS and RHS are truly zero extended and to make sure the
3551  // transformation is profitable.
3552  if ((RHSC->getZExtValue() >> 16 == 0) && isa<LoadSDNode>(LHS) &&
3553  cast<LoadSDNode>(LHS)->getExtensionType() == ISD::ZEXTLOAD &&
3554  cast<LoadSDNode>(LHS)->getMemoryVT() == MVT::i16 &&
3555  LHS.getNode()->hasNUsesOfValue(1, 0)) {
3556  int16_t ValueofRHS = cast<ConstantSDNode>(RHS)->getZExtValue();
3557  if (ValueofRHS < 0 && isLegalArithImmed(-ValueofRHS)) {
3558  SDValue SExt =
3559  DAG.getNode(ISD::SIGN_EXTEND_INREG, dl, LHS.getValueType(), LHS,
3560  DAG.getValueType(MVT::i16));
3561  Cmp = emitComparison(SExt, DAG.getConstant(ValueofRHS, dl,
3562  RHS.getValueType()),
3563  CC, dl, DAG);
3564  AArch64CC = changeIntCCToAArch64CC(CC);
3565  }
3566  }
3567 
3568  if (!Cmp && (RHSC->isZero() || RHSC->isOne())) {
3569  if ((Cmp = emitConjunction(DAG, LHS, AArch64CC))) {
3570  if ((CC == ISD::SETNE) ^ RHSC->isZero())
3571  AArch64CC = AArch64CC::getInvertedCondCode(AArch64CC);
3572  }
3573  }
3574  }
3575 
3576  if (!Cmp) {
3577  Cmp = emitComparison(LHS, RHS, CC, dl, DAG);
3578  AArch64CC = changeIntCCToAArch64CC(CC);
3579  }
3580  AArch64cc = DAG.getConstant(AArch64CC, dl, MVT_CC);
3581  return Cmp;
3582 }
3583 
3584 static std::pair<SDValue, SDValue>
3586  assert((Op.getValueType() == MVT::i32 || Op.getValueType() == MVT::i64) &&
3587  "Unsupported value type");
3588  SDValue Value, Overflow;
3589  SDLoc DL(Op);
3590  SDValue LHS = Op.getOperand(0);
3591  SDValue RHS = Op.getOperand(1);
3592  unsigned Opc = 0;
3593  switch (Op.getOpcode()) {
3594  default:
3595  llvm_unreachable("Unknown overflow instruction!");
3596  case ISD::SADDO:
3597  Opc = AArch64ISD::ADDS;
3598  CC = AArch64CC::VS;
3599  break;
3600  case ISD::UADDO:
3601  Opc = AArch64ISD::ADDS;
3602  CC = AArch64CC::HS;
3603  break;
3604  case ISD::SSUBO:
3605  Opc = AArch64ISD::SUBS;
3606  CC = AArch64CC::VS;
3607  break;
3608  case ISD::USUBO:
3609  Opc = AArch64ISD::SUBS;
3610  CC = AArch64CC::LO;
3611  break;
3612  // Multiply needs a little bit extra work.
3613  case ISD::SMULO:
3614  case ISD::UMULO: {
3615  CC = AArch64CC::NE;
3616  bool IsSigned = Op.getOpcode() == ISD::SMULO;
3617  if (Op.getValueType() == MVT::i32) {
3618  // Extend to 64-bits, then perform a 64-bit multiply.
3619  unsigned ExtendOpc = IsSigned ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND;
3620  LHS = DAG.getNode(ExtendOpc, DL, MVT::i64, LHS);
3621  RHS = DAG.getNode(ExtendOpc, DL, MVT::i64, RHS);
3624 
3625  // Check that the result fits into a 32-bit integer.
3626  SDVTList VTs = DAG.getVTList(MVT::i64, MVT_CC);
3627  if (IsSigned) {
3628  // cmp xreg, wreg, sxtw
3629  SDValue SExtMul = DAG.getNode(ISD::SIGN_EXTEND, DL, MVT::i64, Value);
3630  Overflow =
3631  DAG.getNode(AArch64ISD::SUBS, DL, VTs, Mul, SExtMul).getValue(1);
3632  } else {
3633  // tst xreg, #0xffffffff00000000
3634  SDValue UpperBits = DAG.getConstant(0xFFFFFFFF00000000, DL, MVT::i64);
3635  Overflow =
3636  DAG.getNode(AArch64ISD::ANDS, DL, VTs, Mul, UpperBits).getValue(1);
3637  }
3638  break;
3639  }
3640  assert(Op.getValueType() == MVT::i64 && "Expected an i64 value type");
3641  // For the 64 bit multiply
3642  Value = DAG.getNode(ISD::MUL, DL, MVT::i64, LHS, RHS);
3643  if (IsSigned) {
3644  SDValue UpperBits = DAG.getNode(ISD::MULHS, DL, MVT::i64, LHS, RHS);
3645  SDValue LowerBits = DAG.getNode(ISD::SRA, DL, MVT::i64, Value,
3646  DAG.getConstant(63, DL, MVT::i64));
3647  // It is important that LowerBits is last, otherwise the arithmetic
3648  // shift will not be folded into the compare (SUBS).
3649  SDVTList VTs = DAG.getVTList(MVT::i64, MVT::i32);
3650  Overflow = DAG.getNode(AArch64ISD::SUBS, DL, VTs, UpperBits, LowerBits)
3651  .getValue(1);
3652  } else {
3653  SDValue UpperBits = DAG.getNode(ISD::MULHU, DL, MVT::i64, LHS, RHS);
3654  SDVTList VTs = DAG.getVTList(MVT::i64, MVT::i32);
3655  Overflow =
3656  DAG.getNode(AArch64ISD::SUBS, DL, VTs,
3657  DAG.getConstant(0, DL, MVT::i64),
3658  UpperBits).getValue(1);
3659  }
3660  break;
3661  }
3662  } // switch (...)
3663 
3664  if (Opc) {
3665  SDVTList VTs = DAG.getVTList(Op->getValueType(0), MVT::i32);
3666 
3667  // Emit the AArch64 operation with overflow check.
3668  Value = DAG.getNode(Opc, DL, VTs, LHS, RHS);
3669  Overflow = Value.getValue(1);
3670  }
3671  return std::make_pair(Value, Overflow);
3672 }
3673 
3674 SDValue AArch64TargetLowering::LowerXOR(SDValue Op, SelectionDAG &DAG) const {
3675  if (useSVEForFixedLengthVectorVT(Op.getValueType(),
3676  Subtarget->forceStreamingCompatibleSVE()))
3677  return LowerToScalableOp(Op, DAG);
3678 
3679  SDValue Sel = Op.getOperand(0);
3680  SDValue Other = Op.getOperand(1);
3681  SDLoc dl(Sel);
3682 
3683  // If the operand is an overflow checking operation, invert the condition
3684  // code and kill the Not operation. I.e., transform:
3685  // (xor (overflow_op_bool, 1))
3686  // -->
3687  // (csel 1, 0, invert(cc), overflow_op_bool)
3688  // ... which later gets transformed to just a cset instruction with an
3689  // inverted condition code, rather than a cset + eor sequence.
3690  if (isOneConstant(Other) && ISD::isOverflowIntrOpRes(Sel)) {
3691  // Only lower legal XALUO ops.
3692  if (!DAG.getTargetLoweringInfo().isTypeLegal(Sel->getValueType(0)))
3693  return SDValue();
3694 
3695  SDValue TVal = DAG.getConstant(1, dl, MVT::i32);
3696  SDValue FVal = DAG.getConstant(0, dl, MVT::i32);
3698  SDValue Value, Overflow;
3699  std::tie(Value, Overflow) = getAArch64XALUOOp(CC, Sel.getValue(0), DAG);
3700  SDValue CCVal = DAG.getConstant(getInvertedCondCode(CC), dl, MVT::i32);
3701  return DAG.getNode(AArch64ISD::CSEL, dl, Op.getValueType(), TVal, FVal,
3702  CCVal, Overflow);
3703  }
3704  // If neither operand is a SELECT_CC, give up.
3705  if (Sel.getOpcode() != ISD::SELECT_CC)
3706  std::swap(Sel, Other);
3707  if (Sel.getOpcode() != ISD::SELECT_CC)
3708  return Op;
3709 
3710  // The folding we want to perform is:
3711  // (xor x, (select_cc a, b, cc, 0, -1) )
3712  // -->
3713  // (csel x, (xor x, -1), cc ...)
3714  //
3715  // The latter will get matched to a CSINV instruction.
3716 
3717  ISD::CondCode CC = cast<CondCodeSDNode>(Sel.getOperand(4))->get();
3718  SDValue LHS = Sel.getOperand(0);
3719  SDValue RHS = Sel.getOperand(1);
3720  SDValue TVal = Sel.getOperand(2);
3721  SDValue FVal = Sel.getOperand(3);
3722 
3723  // FIXME: This could be generalized to non-integer comparisons.
3724  if (LHS.getValueType() != MVT::i32 && LHS.getValueType() != MVT::i64)
3725  return Op;
3726 
3727  ConstantSDNode *CFVal = dyn_cast<ConstantSDNode>(FVal);
3728  ConstantSDNode *CTVal = dyn_cast<ConstantSDNode>(TVal);
3729 
3730  // The values aren't constants, this isn't the pattern we're looking for.
3731  if (!CFVal || !CTVal)
3732  return Op;
3733 
3734  // We can commute the SELECT_CC by inverting the condition. This
3735  // might be needed to make this fit into a CSINV pattern.
3736  if (CTVal->isAllOnes() && CFVal->isZero()) {
3737  std::swap(TVal, FVal);
3738  std::swap(CTVal, CFVal);
3739  CC = ISD::getSetCCInverse(CC, LHS.getValueType());
3740  }
3741 
3742  // If the constants line up, perform the transform!
3743  if (CTVal->isZero() && CFVal->isAllOnes()) {
3744  SDValue CCVal;
3745  SDValue Cmp = getAArch64Cmp(LHS, RHS, CC, CCVal, DAG, dl);
3746 
3747  FVal = Other;
3748  TVal = DAG.getNode(ISD::XOR, dl, Other.getValueType(), Other,
3749  DAG.getConstant(-1ULL, dl, Other.getValueType()));
3750 
3751  return DAG.getNode(AArch64ISD::CSEL, dl, Sel.getValueType(), FVal, TVal,
3752  CCVal, Cmp);
3753  }
3754 
3755  return Op;
3756 }
3757 
3758 // If Invert is false, sets 'C' bit of NZCV to 0 if value is 0, else sets 'C'
3759 // bit to 1. If Invert is true, sets 'C' bit of NZCV to 1 if value is 0, else
3760 // sets 'C' bit to 0.
3761 static SDValue valueToCarryFlag(SDValue Value, SelectionDAG &DAG, bool Invert) {
3762  SDLoc DL(Value);
3763  EVT VT = Value.getValueType();
3764  SDValue Op0 = Invert ? DAG.getConstant(0, DL, VT) : Value;
3765  SDValue Op1 = Invert ? Value : DAG.getConstant(1, DL, VT);
3766  SDValue Cmp =
3767  DAG.getNode(AArch64ISD::SUBS, DL, DAG.getVTList(VT, MVT::Glue), Op0, Op1);
3768  return Cmp.getValue(1);
3769 }
3770 
3771 // If Invert is false, value is 1 if 'C' bit of NZCV is 1, else 0.
3772 // If Invert is true, value is 0 if 'C' bit of NZCV is 1, else 1.
3774  bool Invert) {
3775  assert(Flag.getResNo() == 1);
3776  SDLoc DL(Flag);
3777  SDValue Zero = DAG.getConstant(0, DL, VT);
3778  SDValue One = DAG.getConstant(1, DL, VT);
3779  unsigned Cond = Invert ? AArch64CC::LO : AArch64CC::HS;
3780  SDValue CC = DAG.getConstant(Cond, DL, MVT::i32);
3781  return DAG.getNode(AArch64ISD::CSEL, DL, VT, One, Zero, CC, Flag);
3782 }
3783 
3784 // Value is 1 if 'V' bit of NZCV is 1, else 0
3786  assert(Flag.getResNo() == 1);
3787  SDLoc DL(Flag);
3788  SDValue Zero = DAG.getConstant(0, DL, VT);
3789  SDValue One = DAG.getConstant(1, DL, VT);
3791  return DAG.getNode(AArch64ISD::CSEL, DL, VT, One, Zero, CC, Flag);
3792 }
3793 
3794 // This lowering is inefficient, but it will get cleaned up by
3795 // `foldOverflowCheck`
3796 static SDValue lowerADDSUBCARRY(SDValue Op, SelectionDAG &DAG, unsigned Opcode,
3797  bool IsSigned) {
3798  EVT VT0 = Op.getValue(0).getValueType();
3799  EVT VT1 = Op.getValue(1).getValueType();
3800 
3801  if (VT0 != MVT::i32 && VT0 != MVT::i64)
3802  return SDValue();
3803 
3804  bool InvertCarry = Opcode == AArch64ISD::SBCS;
3805  SDValue OpLHS = Op.getOperand(0);
3806  SDValue OpRHS = Op.getOperand(1);
3807  SDValue OpCarryIn = valueToCarryFlag(Op.getOperand(2), DAG, InvertCarry);
3808 
3809  SDLoc DL(Op);
3810  SDVTList VTs = DAG.getVTList(VT0, VT1);
3811 
3812  SDValue Sum = DAG.getNode(Opcode, DL, DAG.getVTList(VT0, MVT::Glue), OpLHS,
3813  OpRHS, OpCarryIn);
3814 
3815  SDValue OutFlag =
3816  IsSigned ? overflowFlagToValue(Sum.getValue(1), VT1, DAG)
3817  : carryFlagToValue(Sum.getValue(1), VT1, DAG, InvertCarry);
3818 
3819  return DAG.getNode(ISD::MERGE_VALUES, DL, VTs, Sum, OutFlag);
3820 }
3821 
3823  // Let legalize expand this if it isn't a legal type yet.
3824  if (!DAG.getTargetLoweringInfo().isTypeLegal(Op.getValueType()))
3825  return SDValue();
3826 
3827  SDLoc dl(Op);
3829  // The actual operation that sets the overflow or carry flag.
3830  SDValue Value, Overflow;
3831  std::tie(Value, Overflow) = getAArch64XALUOOp(CC, Op, DAG);
3832 
3833  // We use 0 and 1 as false and true values.
3834  SDValue TVal = DAG.getConstant(1, dl, MVT::i32);
3835  SDValue FVal = DAG.getConstant(0, dl, MVT::i32);
3836 
3837  // We use an inverted condition, because the conditional select is inverted
3838  // too. This will allow it to be selected to a single instruction:
3839  // CSINC Wd, WZR, WZR, invert(cond).
3840  SDValue CCVal = DAG.getConstant(getInvertedCondCode(CC), dl, MVT::i32);
3841  Overflow = DAG.getNode(AArch64ISD::CSEL, dl, MVT::i32, FVal, TVal,
3842  CCVal, Overflow);
3843 
3844  SDVTList VTs = DAG.getVTList(Op.getValueType(), MVT::i32);
3845  return DAG.getNode(ISD::MERGE_VALUES, dl, VTs, Value, Overflow);
3846 }
3847 
3848 // Prefetch operands are:
3849 // 1: Address to prefetch
3850 // 2: bool isWrite
3851 // 3: int locality (0 = no locality ... 3 = extreme locality)
3852 // 4: bool isDataCache
3854  SDLoc DL(Op);
3855  unsigned IsWrite = cast<ConstantSDNode>(Op.getOperand(2))->getZExtValue();
3856  unsigned Locality = cast<ConstantSDNode>(Op.getOperand(3))->getZExtValue();
3857  unsigned IsData = cast<ConstantSDNode>(Op.getOperand(4))->getZExtValue();
3858 
3859  bool IsStream = !Locality;
3860  // When the locality number is set
3861  if (Locality) {
3862  // The front-end should have filtered out the out-of-range values
3863  assert(Locality <= 3 && "Prefetch locality out-of-range");
3864  // The locality degree is the opposite of the cache speed.
3865  // Put the number the other way around.
3866  // The encoding starts at 0 for level 1
3867  Locality = 3 - Locality;
3868  }
3869 
3870  // built the mask value encoding the expected behavior.
3871  unsigned PrfOp = (IsWrite << 4) | // Load/Store bit
3872  (!IsData << 3) | // IsDataCache bit
3873  (Locality << 1) | // Cache level bits
3874  (unsigned)IsStream; // Stream bit
3875  return DAG.getNode(AArch64ISD::PREFETCH, DL, MVT::Other, Op.getOperand(0),
3876  DAG.getTargetConstant(PrfOp, DL, MVT::i32),
3877  Op.getOperand(1));
3878 }
3879 
3880 SDValue AArch64TargetLowering::LowerFP_EXTEND(SDValue Op,
3881  SelectionDAG &DAG) const {
3882  EVT VT = Op.getValueType();
3883  if (VT.isScalableVector())
3884  return LowerToPredicatedOp(Op, DAG, AArch64ISD::FP_EXTEND_MERGE_PASSTHRU);
3885 
3887  Subtarget->forceStreamingCompatibleSVE()))
3888  return LowerFixedLengthFPExtendToSVE(Op, DAG);
3889 
3890  assert(Op.getValueType() == MVT::f128 && "Unexpected lowering");
3891  return SDValue();
3892 }
3893 
3894 SDValue AArch64TargetLowering::LowerFP_ROUND(SDValue Op,
3895  SelectionDAG &DAG) const {
3896  if (Op.getValueType().isScalableVector())
3897  return LowerToPredicatedOp(Op, DAG, AArch64ISD::FP_ROUND_MERGE_PASSTHRU);
3898 
3899  bool IsStrict = Op->isStrictFPOpcode();
3900  SDValue SrcVal = Op.getOperand(IsStrict ? 1 : 0);
3901  EVT SrcVT = SrcVal.getValueType();
3902 
3903  if (useSVEForFixedLengthVectorVT(SrcVT,
3904  Subtarget->forceStreamingCompatibleSVE()))
3905  return LowerFixedLengthFPRoundToSVE(Op, DAG);
3906 
3907  if (SrcVT != MVT::f128) {
3908  // Expand cases where the input is a vector bigger than NEON.
3909  if (useSVEForFixedLengthVectorVT(SrcVT))
3910  return SDValue();
3911 
3912  // It's legal except when f128 is involved
3913  return Op;
3914  }
3915 
3916  return SDValue();
3917 }
3918 
3919 SDValue AArch64TargetLowering::LowerVectorFP_TO_INT(SDValue Op,
3920  SelectionDAG &DAG) const {
3921  // Warning: We maintain cost tables in AArch64TargetTransformInfo.cpp.
3922  // Any additional optimization in this function should be recorded
3923  // in the cost tables.
3924  bool IsStrict = Op->isStrictFPOpcode();
3925  EVT InVT = Op.getOperand(IsStrict ? 1 : 0).getValueType();
3926  EVT VT = Op.getValueType();
3927 
3928  if (VT.isScalableVector()) {
3929  unsigned Opcode = Op.getOpcode() == ISD::FP_TO_UINT
3932  return LowerToPredicatedOp(Op, DAG, Opcode);
3933  }
3934 
3936  Subtarget->forceStreamingCompatibleSVE()) ||
3938  Subtarget->forceStreamingCompatibleSVE()))
3939  return LowerFixedLengthFPToIntToSVE(Op, DAG);
3940 
3941  unsigned NumElts = InVT.getVectorNumElements();
3942 
3943  // f16 conversions are promoted to f32 when full fp16 is not supported.
3944  if (InVT.getVectorElementType() == MVT::f16 &&
3945  !Subtarget->hasFullFP16()) {
3946  MVT NewVT = MVT::getVectorVT(MVT::f32, NumElts);
3947  SDLoc dl(Op);
3948  if (IsStrict) {
3949  SDValue Ext = DAG.getNode(ISD::STRICT_FP_EXTEND, dl, {NewVT, MVT::Other},
3950  {Op.getOperand(0), Op.getOperand(1)});
3951  return DAG.getNode(Op.getOpcode(), dl, {VT, MVT::Other},
3952  {Ext.getValue(1), Ext.getValue(0)});
3953  }
3954  return DAG.getNode(
3955  Op.getOpcode(), dl, Op.getValueType(),
3956  DAG.getNode(ISD::FP_EXTEND, dl, NewVT, Op.getOperand(0)));
3957  }
3958 
3959  uint64_t VTSize = VT.getFixedSizeInBits();
3960  uint64_t InVTSize = InVT.getFixedSizeInBits();
3961  if (VTSize < InVTSize) {
3962  SDLoc dl(Op);
3963  if (IsStrict) {
3964  InVT = InVT.changeVectorElementTypeToInteger();
3965  SDValue Cv = DAG.getNode(Op.getOpcode(), dl, {InVT, MVT::Other},
3966  {Op.getOperand(0), Op.getOperand(1)});
3967  SDValue Trunc = DAG.getNode(ISD::TRUNCATE, dl, VT, Cv);
3968  return DAG.getMergeValues({Trunc, Cv.getValue(1)}, dl);
3969  }
3970  SDValue Cv =
3971  DAG.getNode(Op.getOpcode(), dl, InVT.changeVectorElementTypeToInteger(),
3972  Op.getOperand(0));
3973  return DAG.getNode(ISD::TRUNCATE, dl, VT, Cv);
3974  }
3975 
3976  if (VTSize > InVTSize) {
3977  SDLoc dl(Op);
3978  MVT ExtVT =
3980  VT.getVectorNumElements());
3981  if (IsStrict) {
3982  SDValue Ext = DAG.getNode(ISD::STRICT_FP_EXTEND, dl, {ExtVT, MVT::Other},
3983  {Op.getOperand(0), Op.getOperand(1)});
3984  return DAG.getNode(Op.getOpcode(), dl, {VT, MVT::Other},
3985  {Ext.getValue(1), Ext.getValue(0)});
3986  }
3987  SDValue Ext = DAG.getNode(ISD::FP_EXTEND, dl, ExtVT, Op.getOperand(0));
3988  return DAG.getNode(Op.getOpcode(), dl, VT, Ext);
3989  }
3990 
3991  // Use a scalar operation for conversions between single-element vectors of
3992  // the same size.
3993  if (NumElts == 1) {
3994  SDLoc dl(Op);
3995  SDValue Extract = DAG.getNode(
3997  Op.getOperand(IsStrict ? 1 : 0), DAG.getConstant(0, dl, MVT::i64));
3998  EVT ScalarVT = VT.getScalarType();
3999  if (IsStrict)
4000  return DAG.getNode(Op.getOpcode(), dl, {ScalarVT, MVT::Other},
4001  {Op.getOperand(0), Extract});
4002  return DAG.getNode(Op.getOpcode(), dl, ScalarVT, Extract);
4003  }
4004 
4005  // Type changing conversions are illegal.
4006  return Op;
4007 }
4008 
4009 SDValue AArch64TargetLowering::LowerFP_TO_INT(SDValue Op,
4010  SelectionDAG &DAG) const {
4011  bool IsStrict = Op->isStrictFPOpcode();
4012  SDValue SrcVal = Op.getOperand(IsStrict ? 1 : 0);
4013 
4014  if (SrcVal.getValueType().isVector())
4015  return LowerVectorFP_TO_INT(Op, DAG);
4016 
4017  // f16 conversions are promoted to f32 when full fp16 is not supported.
4018  if (SrcVal.getValueType() == MVT::f16 && !Subtarget->hasFullFP16()) {
4019  SDLoc dl(Op);
4020  if (IsStrict) {
4021  SDValue Ext =
4023  {Op.getOperand(0), SrcVal});
4024  return DAG.getNode(Op.getOpcode(), dl, {Op.getValueType(), MVT::Other},
4025  {Ext.getValue(1), Ext.getValue(0)});
4026  }
4027  return DAG.getNode(
4028  Op.getOpcode(), dl, Op.getValueType(),
4029  DAG.getNode(ISD::FP_EXTEND, dl, MVT::f32, SrcVal));
4030  }
4031 
4032  if (SrcVal.getValueType() != MVT::f128) {
4033  // It's legal except when f128 is involved
4034  return Op;
4035  }
4036 
4037  return SDValue();
4038 }
4039 
4040 SDValue
4041 AArch64TargetLowering::LowerVectorFP_TO_INT_SAT(SDValue Op,
4042  SelectionDAG &DAG) const {
4043  // AArch64 FP-to-int conversions saturate to the destination element size, so
4044  // we can lower common saturating conversions to simple instructions.
4045  SDValue SrcVal = Op.getOperand(0);
4046  EVT SrcVT = SrcVal.getValueType();
4047  EVT DstVT = Op.getValueType();
4048  EVT SatVT = cast<VTSDNode>(Op.getOperand(1))->getVT();
4049 
4050  uint64_t SrcElementWidth = SrcVT.getScalarSizeInBits();
4051  uint64_t DstElementWidth = DstVT.getScalarSizeInBits();
4052  uint64_t SatWidth = SatVT.getScalarSizeInBits();
4053  assert(SatWidth <= DstElementWidth &&
4054  "Saturation width cannot exceed result width");
4055 
4056  // TODO: Consider lowering to SVE operations, as in LowerVectorFP_TO_INT.
4057  // Currently, the `llvm.fpto[su]i.sat.*` intrinsics don't accept scalable
4058  // types, so this is hard to reach.
4059  if (DstVT.isScalableVector())
4060  return SDValue();
4061 
4062  EVT SrcElementVT = SrcVT.getVectorElementType();
4063 
4064  // In the absence of FP16 support, promote f16 to f32 and saturate the result.
4065  if (SrcElementVT == MVT::f16 &&
4066  (!Subtarget->hasFullFP16() || DstElementWidth > 16)) {
4068  SrcVal = DAG.getNode(ISD::FP_EXTEND, SDLoc(Op), F32VT, SrcVal);
4069  SrcVT = F32VT;
4070  SrcElementVT = MVT::f32;
4071  SrcElementWidth = 32;
4072  } else if (SrcElementVT != MVT::f64 && SrcElementVT != MVT::f32 &&
4073  SrcElementVT != MVT::f16)
4074  return SDValue();
4075 
4076  SDLoc DL(Op);
4077  // Cases that we can emit directly.
4078  if (SrcElementWidth == DstElementWidth && SrcElementWidth == SatWidth)
4079  return DAG.getNode(Op.getOpcode(), DL, DstVT, SrcVal,
4080  DAG.getValueType(DstVT.getScalarType()));
4081 
4082  // Otherwise we emit a cvt that saturates to a higher BW, and saturate the
4083  // result. This is only valid if the legal cvt is larger than the saturate
4084  // width. For double, as we don't have MIN/MAX, it can be simpler to scalarize
4085  // (at least until sqxtn is selected).
4086  if (SrcElementWidth < SatWidth || SrcElementVT == MVT::f64)
4087  return SDValue();
4088 
4089  EVT IntVT = SrcVT.changeVectorElementTypeToInteger();
4090  SDValue NativeCvt = DAG.getNode(Op.getOpcode(), DL, IntVT, SrcVal,
4091  DAG.getValueType(IntVT.getScalarType()));
4092  SDValue Sat;
4093  if (Op.getOpcode() == ISD::FP_TO_SINT_SAT) {
4094  SDValue MinC = DAG.getConstant(
4095  APInt::getSignedMaxValue(SatWidth).sext(SrcElementWidth), DL, IntVT);
4096  SDValue Min = DAG.getNode(ISD::SMIN, DL, IntVT, NativeCvt, MinC);
4097  SDValue MaxC = DAG.getConstant(
4098  APInt::getSignedMinValue(SatWidth).sext(SrcElementWidth), DL, IntVT);
4099  Sat = DAG.getNode(ISD::SMAX, DL, IntVT, Min, MaxC);
4100  } else {
4101  SDValue MinC = DAG.getConstant(
4102  APInt::getAllOnesValue(SatWidth).zext(SrcElementWidth), DL, IntVT);
4103  Sat = DAG.getNode(ISD::UMIN, DL, IntVT, NativeCvt, MinC);
4104  }
4105 
4106  return DAG.getNode(ISD::TRUNCATE, DL, DstVT, Sat);
4107 }
4108 
4109 SDValue AArch64TargetLowering::LowerFP_TO_INT_SAT(SDValue Op,
4110  SelectionDAG &DAG) const {
4111  // AArch64 FP-to-int conversions saturate to the destination register size, so
4112  // we can lower common saturating conversions to simple instructions.
4113  SDValue SrcVal = Op.getOperand(0);
4114  EVT SrcVT = SrcVal.getValueType();
4115 
4116  if (SrcVT.isVector())
4117  return LowerVectorFP_TO_INT_SAT(Op, DAG);
4118 
4119  EVT DstVT = Op.getValueType();
4120  EVT SatVT = cast<VTSDNode>(Op.getOperand(1))->getVT();
4121  uint64_t SatWidth = SatVT.getScalarSizeInBits();
4122  uint64_t DstWidth = DstVT.getScalarSizeInBits();
4123  assert(SatWidth <= DstWidth && "Saturation width cannot exceed result width");
4124 
4125  // In the absence of FP16 support, promote f16 to f32 and saturate the result.
4126  if (SrcVT == MVT::f16 && !Subtarget->hasFullFP16()) {
4127  SrcVal = DAG.getNode(ISD::FP_EXTEND, SDLoc(Op), MVT::f32, SrcVal);
4128  SrcVT = MVT::f32;
4129  } else if (SrcVT != MVT::f64 && SrcVT != MVT::f32 && SrcVT != MVT::f16)
4130  return SDValue();
4131 
4132  SDLoc DL(Op);
4133  // Cases that we can emit directly.
4134  if ((SrcVT == MVT::f64 || SrcVT == MVT::f32 ||
4135  (SrcVT == MVT::f16 && Subtarget->hasFullFP16())) &&
4136  DstVT == SatVT && (DstVT == MVT::i64 || DstVT == MVT::i32))
4137  return DAG.getNode(Op.getOpcode(), DL, DstVT, SrcVal,
4138  DAG.getValueType(DstVT));
4139 
4140  // Otherwise we emit a cvt that saturates to a higher BW, and saturate the
4141  // result. This is only valid if the legal cvt is larger than the saturate
4142  // width.
4143  if (DstWidth < SatWidth)
4144  return SDValue();
4145 
4146  SDValue NativeCvt =
4147  DAG.getNode(Op.getOpcode(), DL, DstVT, SrcVal, DAG.getValueType(DstVT));
4148  SDValue Sat;
4149  if (Op.getOpcode() == ISD::FP_TO_SINT_SAT) {
4150  SDValue MinC = DAG.getConstant(
4151  APInt::getSignedMaxValue(SatWidth).sext(DstWidth), DL, DstVT);
4152  SDValue Min = DAG.getNode(ISD::SMIN, DL, DstVT, NativeCvt, MinC);
4153  SDValue MaxC = DAG.getConstant(
4154  APInt::getSignedMinValue(SatWidth).sext(DstWidth), DL, DstVT);
4155  Sat = DAG.getNode(ISD::SMAX, DL, DstVT, Min, MaxC);
4156  } else {
4157  SDValue MinC = DAG.getConstant(
4158  APInt::getAllOnesValue(SatWidth).zext(DstWidth), DL, DstVT);
4159  Sat = DAG.getNode(ISD::UMIN, DL, DstVT, NativeCvt, MinC);
4160  }
4161 
4162  return DAG.getNode(ISD::TRUNCATE, DL, DstVT, Sat);
4163 }
4164 
4165 SDValue AArch64TargetLowering::LowerVectorINT_TO_FP(SDValue Op,
4166  SelectionDAG &DAG) const {
4167  // Warning: We maintain cost tables in AArch64TargetTransformInfo.cpp.
4168  // Any additional optimization in this function should be recorded
4169  // in the cost tables.
4170  bool IsStrict = Op->isStrictFPOpcode();
4171  EVT VT = Op.getValueType();
4172  SDLoc dl(Op);
4173  SDValue In = Op.getOperand(IsStrict ? 1 : 0);
4174  EVT InVT = In.getValueType();
4175  unsigned Opc = Op.getOpcode();
4176  bool IsSigned = Opc == ISD::SINT_TO_FP || Opc == ISD::STRICT_SINT_TO_FP;
4177 
4178  if (VT.isScalableVector()) {
4179  if (InVT.getVectorElementType() == MVT::i1) {
4180  // We can't directly extend an SVE predicate; extend it first.
4181  unsigned CastOpc = IsSigned ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND;
4182  EVT CastVT = getPromotedVTForPredicate(InVT);
4183  In = DAG.getNode(CastOpc, dl, CastVT, In);
4184  return DAG.getNode(Opc, dl, VT, In);
4185  }
4186 
4187  unsigned Opcode = IsSigned ? AArch64ISD::SINT_TO_FP_MERGE_PASSTHRU
4189  return LowerToPredicatedOp(Op, DAG, Opcode);
4190  }
4191 
4193  Subtarget->forceStreamingCompatibleSVE()) ||
4195  Subtarget->forceStreamingCompatibleSVE()))
4196  return LowerFixedLengthIntToFPToSVE(Op, DAG);
4197 
4198  uint64_t VTSize = VT.getFixedSizeInBits();
4199  uint64_t InVTSize = InVT.getFixedSizeInBits();
4200  if (VTSize < InVTSize) {
4201  MVT CastVT =
4203  InVT.getVectorNumElements());
4204  if (IsStrict) {
4205  In = DAG.getNode(Opc, dl, {CastVT, MVT::Other},
4206  {Op.getOperand(0), In});
4207  return DAG.getNode(
4208  ISD::STRICT_FP_ROUND, dl, {VT, MVT::Other},
4209  {In.getValue(1), In.getValue(0), DAG.getIntPtrConstant(0, dl)});
4210  }
4211  In = DAG.getNode(Opc, dl, CastVT, In);
4212  return DAG.getNode(ISD::FP_ROUND, dl, VT, In,
4213  DAG.getIntPtrConstant(0, dl, /*isTarget=*/true));
4214  }
4215 
4216  if (VTSize > InVTSize) {
4217  unsigned CastOpc = IsSigned ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND;
4218  EVT CastVT = VT.changeVectorElementTypeToInteger();
4219  In = DAG.getNode(CastOpc, dl, CastVT, In);
4220  if (IsStrict)
4221  return DAG.getNode(Opc, dl, {VT, MVT::Other}, {Op.getOperand(0), In});
4222  return DAG.getNode(Opc, dl, VT, In);
4223  }
4224 
4225  // Use a scalar operation for conversions between single-element vectors of
4226  // the same size.
4227  if (VT.getVectorNumElements() == 1) {
4228  SDValue Extract = DAG.getNode(
4230  In, DAG.getConstant(0, dl, MVT::i64));
4231  EVT ScalarVT = VT.getScalarType();
4232  if (IsStrict)
4233  return DAG.getNode(Op.getOpcode(), dl, {ScalarVT, MVT::Other},
4234  {Op.getOperand(0), Extract});
4235  return DAG.getNode(Op.getOpcode(), dl, ScalarVT, Extract);
4236  }
4237 
4238  return Op;
4239 }
4240 
4241 SDValue AArch64TargetLowering::LowerINT_TO_FP(SDValue Op,
4242  SelectionDAG &DAG) const {
4243  if (Op.getValueType().isVector())
4244  return LowerVectorINT_TO_FP(Op, DAG);
4245 
4246  bool IsStrict = Op->isStrictFPOpcode();
4247  SDValue SrcVal = Op.getOperand(IsStrict ? 1 : 0);
4248 
4249  // f16 conversions are promoted to f32 when full fp16 is not supported.
4250  if (Op.getValueType() == MVT::f16 && !Subtarget->hasFullFP16()) {
4251  SDLoc dl(Op);
4252  if (IsStrict) {
4253  SDValue Val = DAG.getNode(Op.getOpcode(), dl, {MVT::f32, MVT::Other},
4254  {Op.getOperand(0), SrcVal});
4255  return DAG.getNode(
4257  {Val.getValue(1), Val.getValue(0), DAG.getIntPtrConstant(0, dl)});
4258  }
4259  return DAG.getNode(
4260  ISD::FP_ROUND, dl, MVT::f16,
4261  DAG.getNode(Op.getOpcode(), dl, MVT::f32, SrcVal),
4262  DAG.getIntPtrConstant(0, dl));
4263  }
4264 
4265  // i128 conversions are libcalls.
4266  if (SrcVal.getValueType() == MVT::i128)
4267  return SDValue();
4268 
4269  // Other conversions are legal, unless it's to the completely software-based
4270  // fp128.
4271  if (Op.getValueType() != MVT::f128)
4272  return Op;
4273  return SDValue();
4274 }
4275 
4276 SDValue AArch64TargetLowering::LowerFSINCOS(SDValue Op,
4277  SelectionDAG &DAG) const {
4278  // For iOS, we want to call an alternative entry point: __sincos_stret,
4279  // which returns the values in two S / D registers.
4280  SDLoc dl(Op);
4281  SDValue Arg = Op.getOperand(0);
4282  EVT ArgVT = Arg.getValueType();
4283  Type *ArgTy = ArgVT.getTypeForEVT(*DAG.getContext());
4284 
4285  ArgListTy Args;
4286  ArgListEntry Entry;
4287 
4288  Entry.Node = Arg;
4289  Entry.Ty = ArgTy;
4290  Entry.IsSExt = false;
4291  Entry.IsZExt = false;
4292  Args.push_back(Entry);
4293 
4294  RTLIB::Libcall LC = ArgVT == MVT::f64 ? RTLIB::SINCOS_STRET_F64
4295  : RTLIB::SINCOS_STRET_F32;
4296  const char *LibcallName = getLibcallName(LC);
4297  SDValue Callee =
4298  DAG.getExternalSymbol(LibcallName, getPointerTy(DAG.getDataLayout()));
4299 
4300  StructType *RetTy = StructType::get(ArgTy, ArgTy);
4302  CLI.setDebugLoc(dl)
4303  .setChain(DAG.getEntryNode())
4304  .setLibCallee(CallingConv::Fast, RetTy, Callee, std::move(Args));
4305 
4306  std::pair<SDValue, SDValue> CallResult = LowerCallTo(CLI);
4307  return CallResult.first;
4308 }
4309 
4310 static MVT getSVEContainerType(EVT ContentTy);
4311 
4312 SDValue AArch64TargetLowering::LowerBITCAST(SDValue Op,
4313  SelectionDAG &DAG) const {
4314  EVT OpVT = Op.getValueType();
4315  EVT ArgVT = Op.getOperand(0).getValueType();
4316 
4317  if (useSVEForFixedLengthVectorVT(OpVT))
4318  return LowerFixedLengthBitcastToSVE(Op, DAG);
4319 
4320  if (OpVT.isScalableVector()) {
4321  // Bitcasting between unpacked vector types of different element counts is
4322  // not a NOP because the live elements are laid out differently.
4323  // 01234567
4324  // e.g. nxv2i32 = XX??XX??
4325  // nxv4f16 = X?X?X?X?
4326  if (OpVT.getVectorElementCount() != ArgVT.getVectorElementCount())
4327  return SDValue();
4328 
4329  if (isTypeLegal(OpVT) && !isTypeLegal(ArgVT)) {
4330  assert(OpVT.isFloatingPoint() && !ArgVT.isFloatingPoint() &&
4331  "Expected int->fp bitcast!");
4332  SDValue ExtResult =
4334  Op.getOperand(0));
4335  return getSVESafeBitCast(OpVT, ExtResult, DAG);
4336  }
4337  return getSVESafeBitCast(OpVT, Op.getOperand(0), DAG);
4338  }
4339 
4340  if (OpVT != MVT::f16 && OpVT != MVT::bf16)
4341  return SDValue();
4342 
4343  // Bitcasts between f16 and bf16 are legal.
4344  if (ArgVT == MVT::f16 || ArgVT == MVT::bf16)
4345  return Op;
4346 
4347  assert(ArgVT == MVT::i16);
4348  SDLoc DL(Op);
4349 
4350  Op = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i32, Op.getOperand(0));
4351  Op = DAG.getNode(ISD::BITCAST, DL, MVT::f32, Op);
4352  return SDValue(
4353  DAG.getMachineNode(TargetOpcode::EXTRACT_SUBREG, DL, OpVT, Op,
4354  DAG.getTargetConstant(AArch64::hsub, DL, MVT::i32)),
4355  0);
4356 }
4357 
4358 static EVT getExtensionTo64Bits(const EVT &OrigVT) {
4359  if (OrigVT.getSizeInBits() >= 64)
4360  return OrigVT;
4361 
4362  assert(OrigVT.isSimple() && "Expecting a simple value type");
4363 
4364  MVT::SimpleValueType OrigSimpleTy = OrigVT.getSimpleVT().SimpleTy;
4365  switch (OrigSimpleTy) {
4366  default: llvm_unreachable("Unexpected Vector Type");
4367  case MVT::v2i8:
4368  case MVT::v2i16:
4369  return MVT::v2i32;
4370  case MVT::v4i8:
4371  return MVT::v4i16;
4372  }
4373 }
4374 
4376  const EVT &OrigTy,
4377  const EVT &ExtTy,
4378  unsigned ExtOpcode) {
4379  // The vector originally had a size of OrigTy. It was then extended to ExtTy.
4380  // We expect the ExtTy to be 128-bits total. If the OrigTy is less than
4381  // 64-bits we need to insert a new extension so that it will be 64-bits.
4382  assert(ExtTy.is128BitVector() && "Unexpected extension size");
4383  if (OrigTy.getSizeInBits() >= 64)
4384  return N;
4385 
4386  // Must extend size to at least 64 bits to be used as an operand for VMULL.
4387  EVT NewVT = getExtensionTo64Bits(OrigTy);
4388 
4389  return DAG.getNode(ExtOpcode, SDLoc(N), NewVT, N);
4390 }
4391 
4392 // Returns lane if Op extracts from a two-element vector and lane is constant
4393 // (i.e., extractelt(<2 x Ty> %v, ConstantLane)), and std::nullopt otherwise.
4394 static std::optional<uint64_t>
4396  SDNode *OpNode = Op.getNode();
4397  if (OpNode->getOpcode() != ISD::EXTRACT_VECTOR_ELT)
4398  return std::nullopt;
4399 
4400  EVT VT = OpNode->getOperand(0).getValueType();
4401  ConstantSDNode *C = dyn_cast<ConstantSDNode>(OpNode->getOperand(1));
4402  if (!VT.isFixedLengthVector() || VT.getVectorNumElements() != 2 || !C)
4403  return std::nullopt;
4404 
4405  return C->getZExtValue();
4406 }
4407 
4409  bool isSigned) {
4410  EVT VT = N->getValueType(0);
4411 
4412  if (N->getOpcode() != ISD::BUILD_VECTOR)
4413  return false;
4414 
4415  for (const SDValue &Elt : N->op_values()) {
4416  if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Elt)) {
4417  unsigned EltSize = VT.getScalarSizeInBits();
4418  unsigned HalfSize = EltSize / 2;
4419  if (isSigned) {
4420  if (!isIntN(HalfSize, C->getSExtValue()))
4421  return false;
4422  } else {
4423  if (!isUIntN(HalfSize, C->getZExtValue()))
4424  return false;
4425  }
4426  continue;
4427  }
4428  return false;
4429  }
4430 
4431  return true;
4432 }
4433 
4435  if (N->getOpcode() == ISD::SIGN_EXTEND ||
4436  N->getOpcode() == ISD::ZERO_EXTEND || N->getOpcode() == ISD::ANY_EXTEND)
4437  return addRequiredExtensionForVectorMULL(N->getOperand(0), DAG,
4438  N->getOperand(0)->getValueType(0),
4439  N->getValueType(0),
4440  N->getOpcode());
4441 
4442  assert(N->getOpcode() == ISD::BUILD_VECTOR && "expected BUILD_VECTOR");
4443  EVT VT = N->getValueType(0);
4444  SDLoc dl(N);
4445  unsigned EltSize = VT.getScalarSizeInBits() / 2;
4446  unsigned NumElts = VT.getVectorNumElements();
4447  MVT TruncVT = MVT::getIntegerVT(EltSize);
4449  for (unsigned i = 0; i != NumElts; ++i) {
4450  ConstantSDNode *C = cast<ConstantSDNode>(N->getOperand(i));
4451  const APInt &CInt = C->getAPIntValue();
4452  // Element types smaller than 32 bits are not legal, so use i32 elements.
4453  // The values are implicitly truncated so sext vs. zext doesn't matter.
4454  Ops.push_back(DAG.getConstant(CInt.zextOrTrunc(32), dl, MVT::i32));
4455  }
4456  return DAG.getBuildVector(MVT::getVectorVT(TruncVT, NumElts), dl, Ops);
4457 }
4458 
4459 static bool isSignExtended(SDNode *N, SelectionDAG &DAG) {
4460  return N->getOpcode() == ISD::SIGN_EXTEND ||
4461  N->getOpcode() == ISD::ANY_EXTEND ||
4462  isExtendedBUILD_VECTOR(N, DAG, true);
4463 }
4464 
4465 static bool isZeroExtended(SDNode *N, SelectionDAG &DAG) {
4466  return N->getOpcode() == ISD::ZERO_EXTEND ||
4467  N->getOpcode() == ISD::ANY_EXTEND ||
4468  isExtendedBUILD_VECTOR(N, DAG, false);
4469 }
4470 
4471 static bool isAddSubSExt(SDNode *N, SelectionDAG &DAG) {
4472  unsigned Opcode = N->getOpcode();
4473  if (Opcode == ISD::ADD || Opcode == ISD::SUB) {
4474  SDNode *N0 = N->getOperand(0).getNode();
4475  SDNode *N1 = N->getOperand(1).getNode();
4476  return N0->hasOneUse() && N1->hasOneUse() &&
4477  isSignExtended(N0, DAG) && isSignExtended(N1, DAG);
4478  }
4479  return false;
4480 }
4481 
4482 static bool isAddSubZExt(SDNode *N, SelectionDAG &DAG) {
4483  unsigned Opcode = N->getOpcode();
4484  if (Opcode == ISD::ADD || Opcode == ISD::SUB) {
4485  SDNode *N0 = N->getOperand(0).getNode();
4486  SDNode *N1 = N->getOperand(1).getNode();
4487  return N0->hasOneUse() && N1->hasOneUse() &&
4488  isZeroExtended(N0, DAG) && isZeroExtended(N1, DAG);
4489  }
4490  return false;
4491 }
4492 
4493 SDValue AArch64TargetLowering::LowerGET_ROUNDING(SDValue Op,
4494  SelectionDAG &DAG) const {
4495  // The rounding mode is in bits 23:22 of the FPSCR.
4496  // The ARM rounding mode value to FLT_ROUNDS mapping is 0->1, 1->2, 2->3, 3->0
4497  // The formula we use to implement this is (((FPSCR + 1 << 22) >> 22) & 3)
4498  // so that the shift + and get folded into a bitfield extract.
4499  SDLoc dl(Op);
4500 
4501  SDValue Chain = Op.getOperand(0);
4502  SDValue FPCR_64 = DAG.getNode(
4504  {Chain, DAG.getConstant(Intrinsic::aarch64_get_fpcr, dl, MVT::i64)});
4505  Chain = FPCR_64.getValue(1);
4506  SDValue FPCR_32 = DAG.getNode(ISD::TRUNCATE, dl, MVT::i32, FPCR_64);
4507  SDValue FltRounds = DAG.getNode(ISD::ADD, dl, MVT::i32, FPCR_32,
4508  DAG.getConstant(1U << 22, dl, MVT::i32));
4509  SDValue RMODE = DAG.getNode(ISD::SRL, dl, MVT::i32, FltRounds,
4510  DAG.getConstant(22, dl, MVT::i32));
4511  SDValue AND = DAG.getNode(ISD::AND, dl, MVT::i32, RMODE,
4512  DAG.getConstant(3, dl, MVT::i32));
4513  return DAG.getMergeValues({AND, Chain}, dl);
4514 }
4515 
4516 SDValue AArch64TargetLowering::LowerSET_ROUNDING(SDValue Op,
4517  SelectionDAG &DAG) const {
4518  SDLoc DL(Op);
4519  SDValue Chain = Op->getOperand(0);
4520  SDValue RMValue = Op->getOperand(1);
4521 
4522  // The rounding mode is in bits 23:22 of the FPCR.
4523  // The llvm.set.rounding argument value to the rounding mode in FPCR mapping
4524  // is 0->3, 1->0, 2->1, 3->2. The formula we use to implement this is
4525  // ((arg - 1) & 3) << 22).
4526  //
4527  // The argument of llvm.set.rounding must be within the segment [0, 3], so
4528  // NearestTiesToAway (4) is not handled here. It is responsibility of the code
4529  // generated llvm.set.rounding to ensure this condition.
4530 
4531  // Calculate new value of FPCR[23:22].
4532  RMValue = DAG.getNode(ISD::SUB, DL, MVT::i32, RMValue,
4533  DAG.getConstant(1, DL, MVT::i32));
4534  RMValue = DAG.getNode(ISD::AND, DL, MVT::i32, RMValue,
4535  DAG.getConstant(0x3, DL, MVT::i32));
4536  RMValue =
4537  DAG.getNode(ISD::SHL, DL, MVT::i32, RMValue,
4539  RMValue = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i64, RMValue);
4540 
4541  // Get current value of FPCR.
4542  SDValue Ops[] = {
4543  Chain, DAG.getTargetConstant(Intrinsic::aarch64_get_fpcr, DL, MVT::i64)};
4544  SDValue FPCR =
4546  Chain = FPCR.getValue(1);
4547  FPCR = FPCR.getValue(0);
4548 
4549  // Put new rounding mode into FPSCR[23:22].
4550  const int RMMask = ~(AArch64::Rounding::rmMask << AArch64::RoundingBitsPos);
4551  FPCR = DAG.getNode(ISD::AND, DL, MVT::i64, FPCR,
4552  DAG.getConstant(RMMask, DL, MVT::i64));
4553  FPCR = DAG.getNode(ISD::OR, DL, MVT::i64, FPCR, RMValue);
4554  SDValue Ops2[] = {
4555  Chain, DAG.getTargetConstant(Intrinsic::aarch64_set_fpcr, DL, MVT::i64),
4556  FPCR};
4557  return DAG.getNode(ISD::INTRINSIC_VOID, DL, MVT::Other, Ops2);
4558 }
4559 
4560 static unsigned selectUmullSmull(SDNode *&N0, SDNode *&N1, SelectionDAG &DAG,
4561  SDLoc DL, bool &IsMLA) {
4562  bool IsN0SExt = isSignExtended(N0, DAG);
4563  bool IsN1SExt = isSignExtended(N1, DAG);
4564  if (IsN0SExt && IsN1SExt)
4565  return AArch64ISD::SMULL;
4566 
4567  bool IsN0ZExt = isZeroExtended(N0, DAG);
4568  bool IsN1ZExt = isZeroExtended(N1, DAG);
4569 
4570  if (IsN0ZExt && IsN1ZExt)
4571  return AArch64ISD::UMULL;
4572 
4573  // Select SMULL if we can replace zext with sext.
4574  if (((IsN0SExt && IsN1ZExt) || (IsN0ZExt && IsN1SExt)) &&
4575  !isExtendedBUILD_VECTOR(N0, DAG, false) &&
4576  !isExtendedBUILD_VECTOR(N1, DAG, false)) {
4577  SDValue ZextOperand;
4578  if (IsN0ZExt)
4579  ZextOperand = N0->getOperand(0);
4580  else
4581  ZextOperand = N1->getOperand(0);
4582  if (DAG.SignBitIsZero(ZextOperand)) {
4583  SDNode *NewSext =
4584  DAG.getSExtOrTrunc(ZextOperand, DL, N0->getValueType(0)).getNode();
4585  if (IsN0ZExt)
4586  N0 = NewSext;
4587  else
4588  N1 = NewSext;
4589  return AArch64ISD::SMULL;
4590  }
4591  }
4592 
4593  // Select UMULL if we can replace the other operand with an extend.
4594  if (IsN0ZExt || IsN1ZExt) {
4595  EVT VT = N0->getValueType(0);
4597  VT.getScalarSizeInBits() / 2);
4598  if (DAG.MaskedValueIsZero(SDValue(IsN0ZExt ? N1 : N0, 0), Mask)) {
4599  EVT HalfVT;
4600  switch (VT.getSimpleVT().SimpleTy) {
4601  case MVT::v2i64:
4602  HalfVT = MVT::v2i32;
4603  break;
4604  case MVT::v4i32:
4605  HalfVT = MVT::v4i16;
4606  break;
4607  case MVT::v8i16:
4608  HalfVT = MVT::v8i8;
4609  break;
4610  default:
4611  return 0;
4612  }
4613  // Truncate and then extend the result.
4614  SDValue NewExt = DAG.getNode(ISD::TRUNCATE, DL, HalfVT,
4615  SDValue(IsN0ZExt ? N1 : N0, 0));
4616  NewExt = DAG.getZExtOrTrunc(NewExt, DL, VT);
4617  if (IsN0ZExt)
4618  N1 = NewExt.getNode();
4619  else
4620  N0 = NewExt.getNode();
4621  return AArch64ISD::UMULL;
4622  }
4623  }
4624 
4625  if (!IsN1SExt && !IsN1ZExt)
4626  return 0;
4627 
4628  // Look for (s/zext A + s/zext B) * (s/zext C). We want to turn these
4629  // into (s/zext A * s/zext C) + (s/zext B * s/zext C)
4630  if (IsN1SExt && isAddSubSExt(N0, DAG)) {
4631  IsMLA = true;
4632  return AArch64ISD::SMULL;
4633  }
4634  if (IsN1ZExt && isAddSubZExt(N0, DAG)) {
4635  IsMLA = true;
4636  return AArch64ISD::UMULL;
4637  }
4638  if (IsN0ZExt && isAddSubZExt(N1, DAG)) {
4639  std::swap(N0, N1);
4640  IsMLA = true;
4641  return AArch64ISD::UMULL;
4642  }
4643  return 0;
4644 }
4645 
4646 SDValue AArch64TargetLowering::LowerMUL(SDValue Op, SelectionDAG &DAG) const {
4647  EVT VT = Op.getValueType();
4648 
4649  // If SVE is available then i64 vector multiplications can also be made legal.
4650  bool OverrideNEON = VT == MVT::v2i64 || VT == MVT::v1i64 ||
4651  Subtarget->forceStreamingCompatibleSVE();
4652 
4653  if (VT.isScalableVector() || useSVEForFixedLengthVectorVT(VT, OverrideNEON))
4654  return LowerToPredicatedOp(Op, DAG, AArch64ISD::MUL_PRED);
4655 
4656  // Multiplications are only custom-lowered for 128-bit vectors so that
4657  // VMULL can be detected. Otherwise v2i64 multiplications are not legal.
4658  assert(VT.is128BitVector() && VT.isInteger() &&
4659  "unexpected type for custom-lowering ISD::MUL");
4660  SDNode *N0 = Op.getOperand(0).getNode();
4661  SDNode *N1 = Op.getOperand(1).getNode();
4662  bool isMLA = false;
4663  SDLoc DL(Op);
4664  unsigned NewOpc = selectUmullSmull(N0, N1, DAG, DL, isMLA);
4665 
4666  if (!NewOpc) {
4667  if (VT == MVT::v2i64)
4668  // Fall through to expand this. It is not legal.
4669  return SDValue();
4670  else
4671  // Other vector multiplications are legal.
4672  return Op;
4673  }
4674 
4675  // Legalize to a S/UMULL instruction
4676  SDValue Op0;
4677  SDValue Op1 = skipExtensionForVectorMULL(N1, DAG);
4678  if (!isMLA) {
4679  Op0 = skipExtensionForVectorMULL(N0, DAG);
4680  assert(Op0.getValueType().is64BitVector() &&
4681  Op1.getValueType().is64BitVector() &&
4682  "unexpected types for extended operands to VMULL");
4683  return DAG.getNode(NewOpc, DL, VT, Op0, Op1);
4684  }
4685  // Optimizing (zext A + zext B) * C, to (S/UMULL A, C) + (S/UMULL B, C) during
4686  // isel lowering to take advantage of no-stall back to back s/umul + s/umla.
4687  // This is true for CPUs with accumulate forwarding such as Cortex-A53/A57
4690  EVT Op1VT = Op1.getValueType();
4691  return DAG.getNode(N0->getOpcode(), DL, VT,
4692  DAG.getNode(NewOpc, DL, VT,
4693  DAG.getNode(ISD::BITCAST, DL, Op1VT, N00), Op1),
4694  DAG.getNode(NewOpc, DL, VT,
4695  DAG.getNode(ISD::BITCAST, DL, Op1VT, N01), Op1));
4696 }
4697 
4698 static inline SDValue getPTrue(SelectionDAG &DAG, SDLoc DL, EVT VT,
4699  int Pattern) {
4701  return DAG.getConstant(1, DL, MVT::nxv1i1);
4702  return DAG.getNode(AArch64ISD::PTRUE, DL, VT,
4704 }
4705 
4706 static SDValue optimizeWhile(SDValue Op, SelectionDAG &DAG, bool IsSigned,
4707  bool IsLess, bool IsEqual) {
4708  if (!isa<ConstantSDNode>(Op.getOperand(1)) ||
4709  !isa<ConstantSDNode>(Op.getOperand(2)))
4710  return SDValue();
4711 
4712  SDLoc dl(Op);
4713  APInt X = Op.getConstantOperandAPInt(1);
4714  APInt Y = Op.getConstantOperandAPInt(2);
4715  APInt NumActiveElems;
4716  bool Overflow;
4717  if (IsLess)
4718  NumActiveElems = IsSigned ? Y.ssub_ov(X, Overflow) : Y.usub_ov(X, Overflow);
4719  else
4720  NumActiveElems = IsSigned ? X.ssub_ov(Y, Overflow) : X.usub_ov(Y, Overflow);
4721 
4722  if (Overflow)
4723  return SDValue();
4724 
4725  if (IsEqual) {
4726  APInt One(NumActiveElems.getBitWidth(), 1, IsSigned);
4727  NumActiveElems = IsSigned ? NumActiveElems.sadd_ov(One, Overflow)
4728  : NumActiveElems.uadd_ov(One, Overflow);
4729  if (Overflow)
4730  return SDValue();
4731  }
4732 
4733  std::optional<unsigned> PredPattern =
4735  unsigned MinSVEVectorSize = std::max(
4737  unsigned ElementSize = 128 / Op.getValueType().getVectorMinNumElements();
4738  if (PredPattern != std::nullopt &&
4739  NumActiveElems.getZExtValue() <= (MinSVEVectorSize / ElementSize))
4740  return getPTrue(DAG, dl, Op.getValueType(), *PredPattern);
4741 
4742  return SDValue();
4743 }
4744 
4745 // Returns a safe bitcast between two scalable vector predicates, where
4746 // any newly created lanes from a widening bitcast are defined as zero.
4748  SDLoc DL(Op);
4749  EVT InVT = Op.getValueType();
4750 
4751  assert(InVT.getVectorElementType() == MVT::i1 &&
4752  VT.getVectorElementType() == MVT::i1 &&
4753  "Expected a predicate-to-predicate bitcast");
4755  InVT.isScalableVector() &&
4756  DAG.getTargetLoweringInfo().isTypeLegal(InVT) &&
4757  "Only expect to cast between legal scalable predicate types!");
4758 
4759  // Return the operand if the cast isn't changing type,
4760  // e.g. <n x 16 x i1> -> <n x 16 x i1>
4761  if (InVT == VT)
4762  return Op;
4763 
4764  SDValue Reinterpret = DAG.getNode(AArch64ISD::REINTERPRET_CAST, DL, VT, Op);
4765 
4766  // We only have to zero the lanes if new lanes are being defined, e.g. when
4767  // casting from <vscale x 2 x i1> to <vscale x 16 x i1>. If this is not the
4768  // case (e.g. when casting from <vscale x 16 x i1> -> <vscale x 2 x i1>) then
4769  // we can return here.
4770  if (InVT.bitsGT(VT))
4771  return Reinterpret;
4772 
4773  // Check if the other lanes are already known to be zeroed by
4774  // construction.
4776  return Reinterpret;
4777 
4778  // Zero the newly introduced lanes.
4779  SDValue Mask = DAG.getConstant(1, DL, InVT);
4781  return DAG.getNode(ISD::AND, DL, VT, Reinterpret, Mask);
4782 }
4783 
4784 SDValue AArch64TargetLowering::getPStateSM(SelectionDAG &DAG, SDValue Chain,
4786  EVT VT) const {
4787  if (Attrs.hasStreamingInterfaceOrBody())
4788  return DAG.getConstant(1, DL, VT);
4789 
4790  if (Attrs.hasNonStreamingInterfaceAndBody())
4791  return DAG.getConstant(0, DL, VT);
4792 
4793  assert(Attrs.hasStreamingCompatibleInterface() && "Unexpected interface");
4794 
4795  SDValue Callee = DAG.getExternalSymbol("__arm_sme_state",
4796  getPointerTy(DAG.getDataLayout()));
4797  Type *Int64Ty = Type::getInt64Ty(*DAG.getContext());
4798  Type *RetTy = StructType::get(Int64Ty, Int64Ty);
4800  ArgListTy Args;
4801  CLI.setDebugLoc(DL).setChain(Chain).setLibCallee(
4803  RetTy, Callee, std::move(Args));
4804  std::pair<SDValue, SDValue> CallResult = LowerCallTo(CLI);
4805  SDValue Mask = DAG.getConstant(/*PSTATE.SM*/ 1, DL, MVT::i64);
4806  return DAG.getNode(ISD::AND, DL, MVT::i64, CallResult.first.getOperand(0),
4807  Mask);
4808 }
4809 
4810 static std::optional<SMEAttrs> getCalleeAttrsFromExternalFunction(SDValue V) {
4811  if (auto *ES = dyn_cast<ExternalSymbolSDNode>(V)) {
4812  StringRef S(ES->getSymbol());
4813  if (S == "__arm_sme_state" || S == "__arm_tpidr2_save")
4815  if (S == "__arm_tpidr2_restore")
4817  }
4818  return std::nullopt;
4819 }
4820 
4821 SDValue AArch64TargetLowering::LowerINTRINSIC_VOID(SDValue Op,
4822  SelectionDAG &DAG) const {
4823  unsigned IntNo = Op.getConstantOperandVal(1);
4824  SDLoc DL(Op);
4825  switch (IntNo) {
4826  default:
4827  return SDValue(); // Don't custom lower most intrinsics.
4828  case Intrinsic::aarch64_prefetch: {
4829  SDValue Chain = Op.getOperand(0);
4830  SDValue Addr = Op.getOperand(2);
4831 
4832  unsigned IsWrite = cast<ConstantSDNode>(Op.getOperand(3))->getZExtValue();
4833  unsigned Locality = cast<ConstantSDNode>(Op.getOperand(4))->getZExtValue();
4834  unsigned IsStream = cast<ConstantSDNode>(Op.getOperand(5))->getZExtValue();
4835  unsigned IsData = cast<ConstantSDNode>(Op.getOperand(6))->getZExtValue();
4836  unsigned PrfOp = (IsWrite << 4) | // Load/Store bit
4837  (!IsData << 3) | // IsDataCache bit
4838  (Locality << 1) | // Cache level bits
4839  (unsigned)IsStream; // Stream bit
4840 
4841  return DAG.getNode(AArch64ISD::PREFETCH, DL, MVT::Other, Chain,
4842  DAG.getTargetConstant(PrfOp, DL, MVT::i32), Addr);
4843  }
4844  case Intrinsic::aarch64_sme_za_enable:
4845  return DAG.getNode(
4847  Op->getOperand(0), // Chain
4848  DAG.getTargetConstant((int32_t)(AArch64SVCR::SVCRZA), DL, MVT::i32),
4849  DAG.getConstant(0, DL, MVT::i64), DAG.getConstant(1, DL, MVT::i64));
4850  case Intrinsic::aarch64_sme_za_disable:
4851  return DAG.getNode(
4853  Op->getOperand(0), // Chain
4854  DAG.getTargetConstant((int32_t)(AArch64SVCR::SVCRZA), DL, MVT::i32),
4855  DAG.getConstant(0, DL, MVT::i64), DAG.getConstant(1, DL, MVT::i64));
4856  }
4857 }
4858 
4859 SDValue AArch64TargetLowering::LowerINTRINSIC_W_CHAIN(SDValue Op,
4860  SelectionDAG &DAG) const {
4861  unsigned IntNo = Op.getConstantOperandVal(1);
4862  SDLoc DL(Op);
4863  switch (IntNo) {
4864  default:
4865  return SDValue(); // Don't custom lower most intrinsics.
4866  case Intrinsic::aarch64_mops_memset_tag: {
4867  auto Node = cast<MemIntrinsicSDNode>(Op.getNode());
4868  SDValue Chain = Node->getChain();
4869  SDValue Dst = Op.getOperand(2);
4870  SDValue Val = Op.getOperand(3);
4871  Val = DAG.getAnyExtOrTrunc(Val, DL, MVT::i64);
4872  SDValue Size = Op.getOperand(4);
4873  auto Alignment = Node->getMemOperand()->getAlign();
4874  bool IsVol = Node->isVolatile();
4875  auto DstPtrInfo = Node->getPointerInfo();
4876 
4877  const auto &SDI =
4878  static_cast<const AArch64SelectionDAGInfo &>(DAG.getSelectionDAGInfo());
4879  SDValue MS =
4880  SDI.EmitMOPS(AArch64ISD::MOPS_MEMSET_TAGGING, DAG, DL, Chain, Dst, Val,
4881  Size, Alignment, IsVol, DstPtrInfo, MachinePointerInfo{});
4882 
4883  // MOPS_MEMSET_TAGGING has 3 results (DstWb, SizeWb, Chain) whereas the
4884  // intrinsic has 2. So hide SizeWb using MERGE_VALUES. Otherwise
4885  // LowerOperationWrapper will complain that the number of results has
4886  // changed.
4887  return DAG.getMergeValues({MS.getValue(0), MS.getValue(2)}, DL);
4888  }
4889  }
4890 }
4891 
4892 SDValue AArch64TargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
4893  SelectionDAG &DAG) const {
4894  unsigned IntNo = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
4895  SDLoc dl(Op);
4896  switch (IntNo) {
4897  default: return SDValue(); // Don't custom lower most intrinsics.
4898  case Intrinsic::thread_pointer: {
4899  EVT PtrVT = getPointerTy(DAG.getDataLayout());
4900  return DAG.getNode(AArch64ISD::THREAD_POINTER, dl, PtrVT);
4901  }
4902  case Intrinsic::aarch64_neon_abs: {
4903  EVT Ty = Op.getValueType();
4904  if (Ty == MVT::i64) {
4906  Op.getOperand(1));
4907  Result = DAG.getNode(ISD::ABS, dl, MVT::v1i64, Result);
4908  return DAG.getNode(ISD::BITCAST, dl, MVT::i64, Result);
4909  } else if (Ty.isVector() && Ty.isInteger() && isTypeLegal(Ty)) {
4910  return DAG.getNode(ISD::ABS, dl, Ty, Op.getOperand(1));
4911  } else {
4912  report_fatal_error("Unexpected type for AArch64 NEON intrinic");
4913  }
4914  }
4915  case Intrinsic::aarch64_neon_pmull64: {
4916  SDValue LHS = Op.getOperand(1);
4917  SDValue RHS = Op.getOperand(2);
4918 
4919  std::optional<uint64_t> LHSLane =
4921  std::optional<uint64_t> RHSLane =
4923 
4924  assert((!LHSLane || *LHSLane < 2) && "Expect lane to be None or 0 or 1");
4925  assert((!RHSLane || *RHSLane < 2) && "Expect lane to be None or 0 or 1");
4926 
4927  // 'aarch64_neon_pmull64' takes i64 parameters; while pmull/pmull2
4928  // instructions execute on SIMD registers. So canonicalize i64 to v1i64,
4929  // which ISel recognizes better. For example, generate a ldr into d*
4930  // registers as opposed to a GPR load followed by a fmov.
4931  auto TryVectorizeOperand = [](SDValue N, std::optional<uint64_t> NLane,
4932  std::optional<uint64_t> OtherLane,
4933  const SDLoc &dl,
4934  SelectionDAG &DAG) -> SDValue {
4935  // If the operand is an higher half itself, rewrite it to
4936  // extract_high_v2i64; this way aarch64_neon_pmull64 could
4937  // re-use the dag-combiner function with aarch64_neon_{pmull,smull,umull}.
4938  if (NLane && *NLane == 1)
4939  return DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MVT::v1i64,
4940  N.getOperand(0), DAG.getConstant(1, dl, MVT::i64));
4941 
4942  // Operand N is not a higher half but the other operand is.
4943  if (OtherLane && *OtherLane == 1) {
4944  // If this operand is a lower half, rewrite it to
4945  // extract_high_v2i64(duplane(<2 x Ty>, 0)). This saves a roundtrip to
4946  // align lanes of two operands. A roundtrip sequence (to move from lane
4947  // 1 to lane 0) is like this:
4948  // mov x8, v0.d[1]
4949  // fmov d0, x8
4950  if (NLane && *NLane == 0)
4951  return DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MVT::v1i64,
4953  N.getOperand(0),
4954  DAG.getConstant(0, dl, MVT::i64)),
4955  DAG.getConstant(1, dl, MVT::i64));
4956 
4957  // Otherwise just dup from main to all lanes.
4958  return DAG.getNode(AArch64ISD::DUP, dl, MVT::v1i64, N);
4959  }
4960 
4961  // Neither operand is an extract of higher half, so codegen may just use
4962  // the non-high version of PMULL instruction. Use v1i64 to represent i64.
4963  assert(N.getValueType() == MVT::i64 &&
4964  "Intrinsic aarch64_neon_pmull64 requires i64 parameters");
4965  return DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v1i64, N);
4966  };
4967 
4968  LHS = TryVectorizeOperand(LHS, LHSLane, RHSLane, dl, DAG);
4969  RHS = TryVectorizeOperand(RHS, RHSLane, LHSLane, dl, DAG);
4970 
4971  return DAG.getNode(AArch64ISD::PMULL, dl, Op.getValueType(), LHS, RHS);
4972  }
4973  case Intrinsic::aarch64_neon_smax:
4974  return DAG.getNode(ISD::SMAX, dl, Op.getValueType(),
4975  Op.getOperand(1), Op.getOperand(2));
4976  case Intrinsic::aarch64_neon_umax:
4977  return DAG.getNode(ISD::UMAX, dl, Op.getValueType(),
4978  Op.getOperand(1), Op.getOperand(2));
4979  case Intrinsic::aarch64_neon_smin:
4980  return DAG.getNode(ISD::SMIN, dl, Op.getValueType(),
4981  Op.getOperand(1), Op.getOperand(2));
4982  case Intrinsic::aarch64_neon_umin:
4983  return DAG.getNode(ISD::UMIN, dl, Op.getValueType(),
4984  Op.getOperand(1), Op.getOperand(2));
4985  case Intrinsic::aarch64_neon_scalar_sqxtn:
4986  case Intrinsic::aarch64_neon_scalar_sqxtun:
4987  case Intrinsic::aarch64_neon_scalar_uqxtn: {
4988  assert(Op.getValueType() == MVT::i32 || Op.getValueType() == MVT::f32);
4989  if (Op.getValueType() == MVT::i32)
4990  return DAG.getNode(ISD::BITCAST, dl, MVT::i32,
4992  Op.getOperand(0),
4993  DAG.getNode(ISD::BITCAST, dl, MVT::f64,
4994  Op.getOperand(1))));
4995  return SDValue();
4996  }
4997  case Intrinsic::aarch64_sve_whilelo:
4998  return optimizeWhile(Op, DAG, /*IsSigned=*/false, /*IsLess=*/true,
4999  /*IsEqual=*/false);
5000  case Intrinsic::aarch64_sve_whilelt:
5001  return optimizeWhile(Op, DAG, /*IsSigned=*/true, /*IsLess=*/true,
5002  /*IsEqual=*/false);
5003  case Intrinsic::aarch64_sve_whilels:
5004  return optimizeWhile(Op, DAG, /*IsSigned=*/false, /*IsLess=*/true,
5005  /*IsEqual=*/true);
5006  case Intrinsic::aarch64_sve_whilele:
5007  return optimizeWhile(Op, DAG, /*IsSigned=*/true, /*IsLess=*/true,
5008  /*IsEqual=*/true);
5009  case Intrinsic::aarch64_sve_whilege:
5010  return optimizeWhile(Op, DAG, /*IsSigned=*/true, /*IsLess=*/false,
5011  /*IsEqual=*/true);
5012  case Intrinsic::aarch64_sve_whilegt:
5013  return optimizeWhile(Op, DAG, /*IsSigned=*/true, /*IsLess=*/false,
5014  /*IsEqual=*/false);
5015  case Intrinsic::aarch64_sve_whilehs:
5016  return optimizeWhile(Op, DAG, /*IsSigned=*/false, /*IsLess=*/false,
5017  /*IsEqual=*/true);
5018  case Intrinsic::aarch64_sve_whilehi:
5019  return optimizeWhile(Op, DAG, /*IsSigned=*/false, /*IsLess=*/false,
5020  /*IsEqual=*/false);
5021  case Intrinsic::aarch64_sve_sunpkhi:
5022  return DAG.getNode(AArch64ISD::SUNPKHI, dl, Op.getValueType(),
5023  Op.getOperand(1));
5024  case Intrinsic::aarch64_sve_sunpklo:
5025  return DAG.getNode(AArch64ISD::SUNPKLO, dl, Op.getValueType(),
5026  Op.getOperand(1));
5027  case Intrinsic::aarch64_sve_uunpkhi:
5028  return DAG.getNode(AArch64ISD::UUNPKHI, dl, Op.getValueType(),
5029  Op.getOperand(1));
5030  case Intrinsic::aarch64_sve_uunpklo:
5031  return DAG.getNode(AArch64ISD::UUNPKLO, dl, Op.getValueType(),
5032  Op.getOperand(1));
5033  case Intrinsic::aarch64_sve_clasta_n:
5034  return DAG.getNode(AArch64ISD::CLASTA_N, dl, Op.getValueType(),
5035  Op.getOperand(1), Op.getOperand(2), Op.getOperand(3));
5036  case Intrinsic::aarch64_sve_clastb_n:
5037  return DAG.getNode(AArch64ISD::CLASTB_N, dl, Op.getValueType(),
5038  Op.getOperand(1), Op.getOperand(2), Op.getOperand(3));
5039  case Intrinsic::aarch64_sve_lasta:
5040  return DAG.getNode(AArch64ISD::LASTA, dl, Op.getValueType(),
5041  Op.getOperand(1), Op.getOperand(2));
5042  case Intrinsic::aarch64_sve_lastb:
5043  return DAG.getNode(AArch64ISD::LASTB, dl, Op.getValueType(),
5044  Op.getOperand(1), Op.getOperand(2));
5045  case Intrinsic::aarch64_sve_rev:
5046  return DAG.getNode(ISD::VECTOR_REVERSE, dl, Op.getValueType(),
5047  Op.getOperand(1));
5048  case Intrinsic::aarch64_sve_tbl:
5049  return DAG.getNode(AArch64ISD::TBL, dl, Op.getValueType(),
5050  Op.getOperand(1), Op.getOperand(2));
5051  case Intrinsic::aarch64_sve_trn1:
5052  return DAG.getNode(AArch64ISD::TRN1, dl, Op.getValueType(),
5053  Op.getOperand(1), Op.getOperand(2));
5054  case Intrinsic::aarch64_sve_trn2:
5055  return DAG.getNode(AArch64ISD::TRN2, dl, Op.getValueType(),
5056  Op.getOperand(1), Op.getOperand(2));
5057  case Intrinsic::aarch64_sve_uzp1:
5058  return DAG.getNode(AArch64ISD::UZP1, dl, Op.getValueType(),
5059  Op.getOperand(1), Op.getOperand(2));
5060  case Intrinsic::aarch64_sve_uzp2:
5061  return DAG.getNode(AArch64ISD::UZP2, dl, Op.getValueType(),
5062  Op.getOperand(1), Op.getOperand(2));
5063  case Intrinsic::aarch64_sve_zip1:
5064  return DAG.getNode(AArch64ISD::ZIP1, dl, Op.getValueType(),
5065  Op.getOperand(1), Op.getOperand(2));
5066  case Intrinsic::aarch64_sve_zip2:
5067  return DAG.getNode(AArch64ISD::ZIP2, dl, Op.getValueType(),
5068  Op.getOperand(1), Op.getOperand(2));
5069  case Intrinsic::aarch64_sve_splice:
5070  return DAG.getNode(AArch64ISD::SPLICE, dl, Op.getValueType(),
5071  Op.getOperand(1), Op.getOperand(2), Op.getOperand(3));
5072  case Intrinsic::aarch64_sve_ptrue:
5073  return getPTrue(DAG, dl, Op.getValueType(),
5074  cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue());
5075  case Intrinsic::aarch64_sve_clz:
5076  return DAG.getNode(AArch64ISD::CTLZ_MERGE_PASSTHRU, dl, Op.getValueType(),
5077  Op.getOperand(2), Op.getOperand(3), Op.getOperand(1));
5078  case Intrinsic::aarch64_sme_cntsb:
5079  return DAG.getNode(AArch64ISD::RDSVL, dl, Op.getValueType(),
5080  DAG.getConstant(1, dl, MVT::i32));
5081  case Intrinsic::aarch64_sme_cntsh: {
5082  SDValue One = DAG.getConstant(1, dl, MVT::i32);
5083  SDValue Bytes = DAG.getNode(AArch64ISD::RDSVL, dl, Op.getValueType(), One);
5084  return DAG.getNode(ISD::SRL, dl, Op.getValueType(), Bytes, One);
5085  }
5086  case Intrinsic::aarch64_sme_cntsw: {
5087  SDValue Bytes = DAG.getNode(AArch64ISD::RDSVL, dl, Op.getValueType(),
5088  DAG.getConstant(1, dl, MVT::i32));
5089  return DAG.getNode(ISD::SRL, dl, Op.getValueType(), Bytes,
5090  DAG.getConstant(2, dl, MVT::i32));
5091  }
5092  case Intrinsic::aarch64_sme_cntsd: {
5093  SDValue Bytes = DAG.getNode(AArch64ISD::RDSVL, dl, Op.getValueType(),
5094  DAG.getConstant(1, dl, MVT::i32));
5095  return DAG.getNode(ISD::SRL, dl, Op.getValueType(), Bytes,
5096  DAG.getConstant(3, dl, MVT::i32));
5097  }
5098  case Intrinsic::aarch64_sve_cnt: {
5099  SDValue Data = Op.getOperand(3);
5100  // CTPOP only supports integer operands.
5101  if (Data.getValueType().isFloatingPoint())
5102  Data = DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Data);
5103  return DAG.getNode(AArch64ISD::CTPOP_MERGE_PASSTHRU, dl, Op.getValueType(),
5104  Op.getOperand(2), Data, Op.getOperand(1));
5105  }
5106  case Intrinsic::aarch64_sve_dupq_lane:
5107  return LowerDUPQLane(Op, DAG);
5108  case Intrinsic::aarch64_sve_convert_from_svbool:
5109  return getSVEPredicateBitCast(Op.getValueType(), Op.getOperand(1), DAG);
5110  case Intrinsic::aarch64_sve_convert_to_svbool:
5111  return getSVEPredicateBitCast(MVT::nxv16i1, Op.getOperand(1), DAG);
5112  case Intrinsic::aarch64_sve_fneg:
5113  return DAG.getNode(AArch64ISD::FNEG_MERGE_PASSTHRU, dl, Op.getValueType(),
5114  Op.getOperand(2), Op.getOperand(3), Op.getOperand(1));
5115  case Intrinsic::aarch64_sve_frintp:
5116  return DAG.getNode(AArch64ISD::FCEIL_MERGE_PASSTHRU, dl, Op.getValueType(),
5117  Op.getOperand(2), Op.getOperand(3), Op.getOperand(1));
5118  case Intrinsic::aarch64_sve_frintm:
5119  return DAG.getNode(AArch64ISD::FFLOOR_MERGE_PASSTHRU, dl, Op.getValueType(),
5120  Op.getOperand(2), Op.getOperand(3), Op.getOperand(1));
5121  case Intrinsic::aarch64_sve_frinti:
5122  return DAG.getNode(AArch64ISD::FNEARBYINT_MERGE_PASSTHRU, dl, Op.getValueType(),
5123  Op.getOperand(2), Op.getOperand(3), Op.getOperand(1));
5124  case Intrinsic::aarch64_sve_frintx:
5125  return DAG.getNode(AArch64ISD::FRINT_MERGE_PASSTHRU, dl, Op.getValueType(),
5126  Op.getOperand(2), Op.getOperand(3), Op.getOperand(1));
5127  case Intrinsic::aarch64_sve_frinta:
5128  return DAG.getNode(AArch64ISD::FROUND_MERGE_PASSTHRU, dl, Op.getValueType(),
5129  Op.getOperand(2), Op.getOperand(3), Op.getOperand(1));
5130  case Intrinsic::aarch64_sve_frintn:
5131  return DAG.getNode(AArch64ISD::FROUNDEVEN_MERGE_PASSTHRU, dl, Op.getValueType(),
5132  Op.getOperand(2), Op.getOperand(3), Op.getOperand(1));
5133  case Intrinsic::aarch64_sve_frintz:
5134  return DAG.getNode(AArch64ISD::FTRUNC_MERGE_PASSTHRU, dl, Op.getValueType(),
5135  Op.getOperand(2), Op.getOperand(3), Op.getOperand(1));
5136  case Intrinsic::aarch64_sve_ucvtf:
5138  Op.getValueType(), Op.getOperand(2), Op.getOperand(3),
5139  Op.getOperand(1));
5140  case Intrinsic::aarch64_sve_scvtf:
5142  Op.getValueType(), Op.getOperand(2), Op.getOperand(3),
5143  Op.getOperand(1));
5144  case Intrinsic::aarch64_sve_fcvtzu:
5146  Op.getValueType(), Op.getOperand(2), Op.getOperand(3),
5147  Op.getOperand(1));
5148  case Intrinsic::aarch64_sve_fcvtzs:
5150  Op.getValueType(), Op.getOperand(2), Op.getOperand(3),
5151  Op.getOperand(1));
5152  case Intrinsic::aarch64_sve_fsqrt:
5153  return DAG.getNode(AArch64ISD::FSQRT_MERGE_PASSTHRU, dl, Op.getValueType(),
5154  Op.getOperand(2), Op.getOperand(3), Op.getOperand(1));
5155  case Intrinsic::aarch64_sve_frecpx:
5156  return DAG.getNode(AArch64ISD::FRECPX_MERGE_PASSTHRU, dl, Op.getValueType(),
5157  Op.getOperand(2), Op.getOperand(3), Op.getOperand(1));
5158  case Intrinsic::aarch64_sve_frecpe_x:
5159  return DAG.getNode(AArch64ISD::FRECPE, dl, Op.getValueType(),
5160  Op.getOperand(1));
5161  case Intrinsic::aarch64_sve_frecps_x:
5162  return DAG.getNode(AArch64ISD::FRECPS, dl, Op.getValueType(),
5163  Op.getOperand(1), Op.getOperand(2));
5164  case Intrinsic::aarch64_sve_frsqrte_x:
5165  return DAG.getNode(AArch64ISD::FRSQRTE, dl, Op.getValueType(),
5166  Op.getOperand(1));
5167  case Intrinsic::aarch64_sve_frsqrts_x:
5168  return DAG.getNode(AArch64ISD::FRSQRTS, dl, Op.getValueType(),
5169  Op.getOperand(1), Op.getOperand(2));
5170  case Intrinsic::aarch64_sve_fabs:
5171  return DAG.getNode(AArch64ISD::FABS_MERGE_PASSTHRU, dl, Op.getValueType(),
5172  Op.getOperand(2), Op.getOperand(3), Op.getOperand(1));
5173  case Intrinsic::aarch64_sve_abs:
5174  return DAG.getNode(AArch64ISD::ABS_MERGE_PASSTHRU, dl, Op.getValueType(),
5175  Op.getOperand(2), Op.getOperand(3), Op.getOperand(1));
5176  case Intrinsic::aarch64_sve_neg:
5177  return DAG.getNode(AArch64ISD::NEG_MERGE_PASSTHRU, dl, Op.getValueType(),
5178  Op.getOperand(2), Op.getOperand(3), Op.getOperand(1));
5179  case Intrinsic::aarch64_sve_insr: {
5180  SDValue Scalar = Op.getOperand(2);
5181  EVT ScalarTy = Scalar.getValueType();
5182  if ((ScalarTy == MVT::i8) || (ScalarTy == MVT::i16))
5183  Scalar = DAG.getNode(ISD::ANY_EXTEND, dl, MVT::i32, Scalar);
5184 
5185  return DAG.getNode(AArch64ISD::INSR, dl, Op.getValueType(),
5186  Op.getOperand(1), Scalar);
5187  }
5188  case Intrinsic::aarch64_sve_rbit:
5190  Op.getValueType(), Op.getOperand(2), Op.getOperand(3),
5191  Op.getOperand(1));
5192  case Intrinsic::aarch64_sve_revb:
5193  return DAG.getNode(AArch64ISD::BSWAP_MERGE_PASSTHRU, dl, Op.getValueType(),
5194  Op.getOperand(2), Op.getOperand(3), Op.getOperand(1));
5195  case Intrinsic::aarch64_sve_revh:
5196  return DAG.getNode(AArch64ISD::REVH_MERGE_PASSTHRU, dl, Op.getValueType(),
5197  Op.getOperand(2), Op.getOperand(3), Op.getOperand(1));
5198  case Intrinsic::aarch64_sve_revw:
5199  return DAG.getNode(AArch64ISD::REVW_MERGE_PASSTHRU, dl, Op.getValueType(),
5200  Op.getOperand(2), Op.getOperand(3), Op.getOperand(1));
5201  case Intrinsic::aarch64_sve_revd:
5202  return DAG.getNode(AArch64ISD::REVD_MERGE_PASSTHRU, dl, Op.getValueType(),
5203  Op.getOperand(2), Op.getOperand(3), Op.getOperand(1));
5204  case Intrinsic::aarch64_sve_sxtb:
5205  return DAG.getNode(
5207  Op.getOperand(2), Op.getOperand(3),
5208  DAG.getValueType(Op.getValueType().changeVectorElementType(MVT::i8)),
5209  Op.getOperand(1));
5210  case Intrinsic::aarch64_sve_sxth:
5211  return DAG.getNode(
5213  Op.getOperand(2), Op.getOperand(3),
5214  DAG.getValueType(Op.getValueType().changeVectorElementType(MVT::i16)),
5215  Op.getOperand(1));
5216  case Intrinsic::aarch64_sve_sxtw:
5217  return DAG.getNode(
5219  Op.getOperand(2), Op.getOperand(3),
5220  DAG.getValueType(Op.getValueType().changeVectorElementType(MVT::i32)),
5221  Op.getOperand(1));
5222  case Intrinsic::aarch64_sve_uxtb:
5223  return DAG.getNode(
5225  Op.getOperand(2), Op.getOperand(3),
5226  DAG.getValueType(Op.getValueType().changeVectorElementType(MVT::i8)),
5227  Op.getOperand(1));
5228  case Intrinsic::aarch64_sve_uxth:
5229  return DAG.getNode(
5231  Op.getOperand(2), Op.getOperand(3),
5232  DAG.getValueType(Op.getValueType().changeVectorElementType(MVT::i16)),
5233  Op.getOperand(1));
5234  case Intrinsic::aarch64_sve_uxtw:
5235  return DAG.getNode(
5237  Op.getOperand(2), Op.getOperand(3),
5238  DAG.getValueType(Op.getValueType().changeVectorElementType(MVT::i32)),
5239  Op.getOperand(1));
5240  case Intrinsic::localaddress: {
5241  const auto &MF = DAG.getMachineFunction();
5242  const auto *RegInfo = Subtarget->getRegisterInfo();
5243  unsigned Reg = RegInfo->getLocalAddressRegister(MF);
5244  return DAG.getCopyFromReg(DAG.getEntryNode(), dl, Reg,
5245  Op.getSimpleValueType());
5246  }
5247 
5248  case Intrinsic::eh_recoverfp: {
5249  // FIXME: This needs to be implemented to correctly handle highly aligned
5250  // stack objects. For now we simply return the incoming FP. Refer D53541
5251  // for more details.
5252  SDValue FnOp = Op.getOperand(1);
5253  SDValue IncomingFPOp = Op.getOperand(2);
5254  GlobalAddressSDNode *GSD = dyn_cast<GlobalAddressSDNode>(FnOp);
5255  auto *Fn = dyn_cast_or_null<Function>(GSD ? GSD->getGlobal() : nullptr);
5256  if (!Fn)
5258  "llvm.eh.recoverfp must take a function as the first argument");
5259  return IncomingFPOp;
5260  }
5261 
5262  case Intrinsic::aarch64_neon_vsri:
5263  case Intrinsic::aarch64_neon_vsli: {
5264  EVT Ty = Op.getValueType();
5265 
5266  if (!Ty.isVector())
5267  report_fatal_error("Unexpected type for aarch64_neon_vsli");
5268 
5269  assert(Op.getConstantOperandVal(3) <= Ty.getScalarSizeInBits());
5270 
5271  bool IsShiftRight = IntNo == Intrinsic::aarch64_neon_vsri;
5272  unsigned Opcode = IsShiftRight ? AArch64ISD::VSRI : AArch64ISD::VSLI;
5273  return DAG.getNode(Opcode, dl, Ty, Op.getOperand(1), Op.getOperand(2),
5274  Op.getOperand(3));
5275  }
5276 
5277  case Intrinsic::aarch64_neon_srhadd:
5278  case Intrinsic::aarch64_neon_urhadd:
5279  case Intrinsic::aarch64_neon_shadd:
5280  case Intrinsic::aarch64_neon_uhadd: {
5281  bool IsSignedAdd = (IntNo == Intrinsic::aarch64_neon_srhadd ||
5282  IntNo == Intrinsic::aarch64_neon_shadd);
5283  bool IsRoundingAdd = (IntNo == Intrinsic::aarch64_neon_srhadd ||
5284  IntNo == Intrinsic::aarch64_neon_urhadd);
5285  unsigned Opcode = IsSignedAdd
5286  ? (IsRoundingAdd ? ISD::AVGCEILS : ISD::AVGFLOORS)
5287  : (IsRoundingAdd ? ISD::AVGCEILU : ISD::AVGFLOORU);
5288  return DAG.getNode(Opcode, dl, Op.getValueType(), Op.getOperand(1),
5289  Op.getOperand(2));
5290  }
5291  case Intrinsic::aarch64_neon_saddlp:
5292  case Intrinsic::aarch64_neon_uaddlp: {
5293  unsigned Opcode = IntNo == Intrinsic::aarch64_neon_uaddlp
5296  return DAG.getNode(Opcode, dl, Op.getValueType(), Op.getOperand(1));
5297  }
5298  case Intrinsic::aarch64_neon_sdot:
5299  case Intrinsic::aarch64_neon_udot:
5300  case Intrinsic::aarch64_sve_sdot:
5301  case Intrinsic::aarch64_sve_udot: {
5302  unsigned Opcode = (IntNo == Intrinsic::aarch64_neon_udot ||
5303  IntNo == Intrinsic::aarch64_sve_udot)
5305  : AArch64ISD::SDOT;
5306  return DAG.getNode(Opcode, dl, Op.getValueType(), Op.getOperand(1),
5307  Op.getOperand(2), Op.getOperand(3));
5308  }
5309  case Intrinsic::get_active_lane_mask: {
5310  SDValue ID =
5311  DAG.getTargetConstant(Intrinsic::aarch64_sve_whilelo, dl, MVT::i64);
5312  return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, Op.getValueType(), ID,
5313  Op.getOperand(1), Op.getOperand(2));
5314  }
5315  }
5316 }
5317 
5318 bool AArch64TargetLowering::shouldExtendGSIndex(EVT VT, EVT &EltTy) const {
5319  if (VT.getVectorElementType() == MVT::i8 ||
5320  VT.getVectorElementType() == MVT::i16) {
5321  EltTy = MVT::i32;
5322  return true;
5323  }
5324  return false;
5325 }
5326 
5327 bool AArch64TargetLowering::shouldRemoveExtendFromGSIndex(EVT IndexVT,
5328  EVT DataVT) const {
5329  // SVE only supports implicit extension of 32-bit indices.
5330  if (!Subtarget->hasSVE() || IndexVT.getVectorElementType() != MVT::i32)
5331  return false;
5332 
5333  // Indices cannot be smaller than the main data type.
5334  if (IndexVT.getScalarSizeInBits() < DataVT.getScalarSizeInBits())
5335  return false;
5336 
5337  // Scalable vectors with "vscale * 2" or fewer elements sit within a 64-bit
5338  // element container type, which would violate the previous clause.
5339  return DataVT.isFixedLengthVector() || DataVT.getVectorMinNumElements() > 2;
5340 }
5341 
5342 bool AArch64TargetLowering::isVectorLoadExtDesirable(SDValue ExtVal) const {
5343  return ExtVal.getValueType().isScalableVector() ||
5345  ExtVal.getValueType(),
5346  /*OverrideNEON=*/Subtarget->useSVEForFixedLengthVectors());
5347 }
5348 
5349 unsigned getGatherVecOpcode(bool IsScaled, bool IsSigned, bool NeedsExtend) {
5350  std::map<std::tuple<bool, bool, bool>, unsigned> AddrModes = {
5351  {std::make_tuple(/*Scaled*/ false, /*Signed*/ false, /*Extend*/ false),
5353  {std::make_tuple(/*Scaled*/ false, /*Signed*/ false, /*Extend*/ true),
5355  {std::make_tuple(/*Scaled*/ false, /*Signed*/ true, /*Extend*/ false),
5357  {std::make_tuple(/*Scaled*/ false, /*Signed*/ true, /*Extend*/ true),
5359  {std::make_tuple(/*Scaled*/ true, /*Signed*/ false, /*Extend*/ false),
5361  {std::make_tuple(/*Scaled*/ true, /*Signed*/ false, /*Extend*/ true),
5363  {std::make_tuple(/*Scaled*/ true, /*Signed*/ true, /*Extend*/ false),
5365  {std::make_tuple(/*Scaled*/ true, /*Signed*/ true, /*Extend*/ true),
5367  };
5368  auto Key = std::make_tuple(IsScaled, IsSigned, NeedsExtend);
5369  return AddrModes.find(Key)->second;
5370 }
5371 
5372 unsigned getSignExtendedGatherOpcode(unsigned Opcode) {
5373  switch (Opcode) {
5374  default:
5375  llvm_unreachable("unimplemented opcode");
5376  return Opcode;
5391  }
5392 }
5393 
5394 SDValue AArch64TargetLowering::LowerMGATHER(SDValue Op,
5395  SelectionDAG &DAG) const {
5396  MaskedGatherSDNode *MGT = cast<MaskedGatherSDNode>(Op);
5397 
5398  SDLoc DL(Op);
5399  SDValue Chain = MGT->getChain();
5400  SDValue PassThru = MGT->getPassThru();
5401  SDValue Mask = MGT->getMask();
5402  SDValue BasePtr = MGT->getBasePtr();
5403  SDValue Index = MGT->getIndex();
5404  SDValue Scale = MGT->getScale();
5405  EVT VT = Op.getValueType();
5406  EVT MemVT = MGT->getMemoryVT();
5407  ISD::LoadExtType ExtType = MGT->getExtensionType();
5408  ISD::MemIndexType IndexType = MGT->getIndexType();
5409 
5410  // SVE supports zero (and so undef) passthrough values only, everything else
5411  // must be handled manually by an explicit select on the load's output.
5412  if (!PassThru->isUndef() && !isZerosVector(PassThru.getNode())) {
5413  SDValue Ops[] = {Chain, DAG.getUNDEF(VT), Mask, BasePtr, Index, Scale};
5414  SDValue Load =
5415  DAG.getMaskedGather(MGT->getVTList(), MemVT, DL, Ops,
5416  MGT->getMemOperand(), IndexType, ExtType);
5417  SDValue Select = DAG.getSelect(DL, VT, Mask, Load, PassThru);
5418  return DAG.getMergeValues({Select, Load.getValue(1)}, DL);
5419  }
5420 
5421  bool IsScaled = MGT->isIndexScaled();
5422  bool IsSigned = MGT->isIndexSigned();
5423 
5424  // SVE supports an index scaled by sizeof(MemVT.elt) only, everything else
5425  // must be calculated before hand.
5426  uint64_t ScaleVal = cast<ConstantSDNode>(Scale)->getZExtValue();
5427  if (IsScaled && ScaleVal != MemVT.getScalarStoreSize()) {
5428  assert(isPowerOf2_64(ScaleVal) && "Expecting power-of-two types");
5429  EVT IndexVT = Index.getValueType();
5430  Index = DAG.getNode(ISD::SHL, DL, IndexVT, Index,
5431  DAG.getConstant(Log2_32(ScaleVal), DL, IndexVT));
5432  Scale = DAG.getTargetConstant(1, DL, Scale.getValueType());
5433 
5434  SDValue Ops[] = {Chain, PassThru, Mask, BasePtr, Index, Scale};
5435  return DAG.getMaskedGather(MGT->getVTList(), MemVT, DL, Ops,
5436  MGT->getMemOperand(), IndexType, ExtType);
5437  }
5438 
5439  // Lower fixed length gather to a scalable equivalent.
5440  if (VT.isFixedLengthVector()) {
5441  assert(Subtarget->useSVEForFixedLengthVectors() &&
5442  "Cannot lower when not using SVE for fixed vectors!");
5443 
5444  // NOTE: Handle floating-point as if integer then bitcast the result.
5445  EVT DataVT = VT.changeVectorElementTypeToInteger();
5446  MemVT = MemVT.changeVectorElementTypeToInteger();
5447 
5448  // Find the smallest integer fixed length vector we can use for the gather.
5449  EVT PromotedVT = VT.changeVectorElementType(MVT::i32);
5450  if (DataVT.getVectorElementType() == MVT::i64 ||
5451  Index.getValueType().getVectorElementType() == MVT::i64 ||
5452  Mask.getValueType().getVectorElementType() == MVT::i64)
5453  PromotedVT = VT.changeVectorElementType(MVT::i64);
5454 
5455  // Promote vector operands except for passthrough, which we know is either
5456  // undef or zero, and thus best constructed directly.
5457  unsigned ExtOpcode = IsSigned ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND;
5458  Index = DAG.getNode(ExtOpcode, DL, PromotedVT, Index);
5459  Mask = DAG.getNode(ISD::SIGN_EXTEND, DL, PromotedVT, Mask);
5460 
5461  // A promoted result type forces the need for an extending load.
5462  if (PromotedVT != DataVT && ExtType == ISD::NON_EXTLOAD)
5463  ExtType = ISD::EXTLOAD;
5464 
5465  EVT ContainerVT = getContainerForFixedLengthVector(DAG, PromotedVT);
5466 
5467  // Convert fixed length vector operands to scalable.
5468  MemVT = ContainerVT.changeVectorElementType(MemVT.getVectorElementType());
5469  Index = convertToScalableVector(DAG, ContainerVT, Index);
5471  PassThru = PassThru->isUndef() ? DAG.getUNDEF(ContainerVT)
5472  : DAG.getConstant(0, DL, ContainerVT);
5473 
5474  // Emit equivalent scalable vector gather.
5475  SDValue Ops[] = {Chain, PassThru, Mask, BasePtr, Index, Scale};
5476  SDValue Load =
5477  DAG.getMaskedGather(DAG.getVTList(ContainerVT, MVT::Other), MemVT, DL,
5478  Ops, MGT->getMemOperand(), IndexType, ExtType);
5479 
5480  // Extract fixed length data then convert to the required result type.
5481  SDValue Result = convertFromScalableVector(DAG, PromotedVT, Load);
5482  Result = DAG.getNode(ISD::TRUNCATE, DL, DataVT, Result);
5483  if (VT.isFloatingPoint())
5484  Result = DAG.getNode(ISD::BITCAST, DL, VT, Result);
5485 
5486  return DAG.getMergeValues({Result, Load.getValue(1)}, DL);
5487  }
5488 
5489  // Everything else is legal.
5490  return Op;
5491 }
5492 
5493 SDValue AArch64TargetLowering::LowerMSCATTER(SDValue Op,
5494  SelectionDAG &DAG) const {
5495  MaskedScatterSDNode *MSC = cast<MaskedScatterSDNode>(Op);
5496 
5497  SDLoc DL(Op);
5498  SDValue Chain = MSC->getChain();
5499  SDValue StoreVal = MSC->getValue();
5500  SDValue Mask = MSC->getMask();
5501  SDValue BasePtr = MSC->getBasePtr();
5502  SDValue Index = MSC->getIndex();
5503  SDValue Scale = MSC->getScale();
5504  EVT VT = StoreVal.getValueType();
5505  EVT MemVT = MSC->getMemoryVT();
5506  ISD::MemIndexType IndexType = MSC->getIndexType();
5507  bool Truncating = MSC->isTruncatingStore();
5508 
5509  bool IsScaled = MSC->isIndexScaled();
5510  bool IsSigned = MSC->isIndexSigned();
5511 
5512  // SVE supports an index scaled by sizeof(MemVT.elt) only, everything else
5513  // must be calculated before hand.
5514  uint64_t ScaleVal = cast<ConstantSDNode>(Scale)->getZExtValue();
5515  if (IsScaled && ScaleVal != MemVT.getScalarStoreSize()) {
5516  assert(isPowerOf2_64(ScaleVal) && "Expecting power-of-two types");
5517  EVT IndexVT = Index.getValueType();
5518  Index = DAG.getNode(ISD::SHL, DL, IndexVT, Index,
5519  DAG.getConstant(Log2_32(ScaleVal), DL, IndexVT));
5520  Scale = DAG.getTargetConstant(1, DL, Scale.getValueType());
5521 
5522  SDValue Ops[] = {Chain, StoreVal, Mask, BasePtr, Index, Scale};
5523  return DAG.getMaskedScatter(MSC->getVTList(), MemVT, DL, Ops,
5524  MSC->getMemOperand(), IndexType, Truncating);
5525  }
5526 
5527  // Lower fixed length scatter to a scalable equivalent.
5528  if (VT.isFixedLengthVector()) {
5529  assert(Subtarget->useSVEForFixedLengthVectors() &&
5530  "Cannot lower when not using SVE for fixed vectors!");
5531 
5532  // Once bitcast we treat floating-point scatters as if integer.
5533  if (VT.isFloatingPoint()) {
5535  MemVT = MemVT.changeVectorElementTypeToInteger();
5536  StoreVal = DAG.getNode(ISD::BITCAST, DL, VT, StoreVal);
5537  }
5538 
5539  // Find the smallest integer fixed length vector we can use for the scatter.
5540  EVT PromotedVT = VT.changeVectorElementType(MVT::i32);
5541  if (VT.getVectorElementType() == MVT::i64 ||
5542  Index.getValueType().getVectorElementType() == MVT::i64 ||
5543  Mask.getValueType().getVectorElementType() == MVT::i64)
5544  PromotedVT = VT.changeVectorElementType(MVT::i64);
5545 
5546  // Promote vector operands.
5547  unsigned ExtOpcode = IsSigned ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND;
5548  Index = DAG.getNode(ExtOpcode, DL, PromotedVT, Index);
5549  Mask = DAG.getNode(ISD::SIGN_EXTEND, DL, PromotedVT, Mask);
5550  StoreVal = DAG.getNode(ISD::ANY_EXTEND, DL, PromotedVT, StoreVal);
5551 
5552  // A promoted value type forces the need for a truncating store.
5553  if (PromotedVT != VT)
5554  Truncating = true;
5555 
5556  EVT ContainerVT = getContainerForFixedLengthVector(DAG, PromotedVT);
5557 
5558  // Convert fixed length vector operands to scalable.
5559  MemVT = ContainerVT.changeVectorElementType(MemVT.getVectorElementType());
5560  Index = convertToScalableVector(DAG, ContainerVT, Index);
5562  StoreVal = convertToScalableVector(DAG, ContainerVT, StoreVal);
5563 
5564  // Emit equivalent scalable vector scatter.
5565  SDValue Ops[] = {Chain, StoreVal, Mask, BasePtr, Index, Scale};
5566  return DAG.getMaskedScatter(MSC->getVTList(), MemVT, DL, Ops,
5567  MSC->getMemOperand(), IndexType, Truncating);
5568  }
5569 
5570  // Everything else is legal.
5571  return Op;
5572 }
5573 
5574 SDValue AArch64TargetLowering::LowerMLOAD(SDValue Op, SelectionDAG &DAG) const {
5575  SDLoc DL(Op);
5576  MaskedLoadSDNode *LoadNode = cast<MaskedLoadSDNode>(Op);
5577  assert(LoadNode && "Expected custom lowering of a masked load node");
5578  EVT VT = Op->getValueType(0);
5579 
5581  VT,
5582  /*OverrideNEON=*/Subtarget->useSVEForFixedLengthVectors()))
5583  return LowerFixedLengthVectorMLoadToSVE(Op, DAG);
5584 
5585  SDValue PassThru = LoadNode->getPassThru();
5586  SDValue Mask = LoadNode->getMask();
5587 
5588  if (PassThru->isUndef() || isZerosVector(PassThru.getNode()))
5589  return Op;
5590 
5591  SDValue Load = DAG.getMaskedLoad(
5592  VT, DL, LoadNode->getChain(), LoadNode->getBasePtr(),
5593  LoadNode->getOffset(), Mask, DAG.getUNDEF(VT), LoadNode->getMemoryVT(),
5594  LoadNode->getMemOperand(), LoadNode->getAddressingMode(),
5595  LoadNode->getExtensionType());
5596 
5597  SDValue Result = DAG.getSelect(DL, VT, Mask, Load, PassThru);
5598 
5599  return DAG.getMergeValues({Result, Load.getValue(1)}, DL);
5600 }
5601 
5602 // Custom lower trunc store for v4i8 vectors, since it is promoted to v4i16.
5604  EVT VT, EVT MemVT,
5605  SelectionDAG &DAG) {
5606  assert(VT.isVector() && "VT should be a vector type");
5607  assert(MemVT == MVT::v4i8 && VT == MVT::v4i16);
5608 
5609  SDValue Value = ST->getValue();
5610 
5611  // It first extend the promoted v4i16 to v8i16, truncate to v8i8, and extract
5612  // the word lane which represent the v4i8 subvector. It optimizes the store
5613  // to:
5614  //
5615  // xtn v0.8b, v0.8h
5616  // str s0, [x0]
5617 
5618  SDValue Undef = DAG.getUNDEF(MVT::i16);
5619  SDValue UndefVec = DAG.getBuildVector(MVT::v4i16, DL,
5620  {Undef, Undef, Undef, Undef});
5621 
5622  SDValue TruncExt = DAG.getNode(ISD::CONCAT_VECTORS, DL, MVT::v8i16,
5623  Value, UndefVec);
5624  SDValue Trunc = DAG.getNode(ISD::TRUNCATE, DL, MVT::v8i8, TruncExt);
5625 
5626  Trunc = DAG.getNode(ISD::BITCAST, DL, MVT::v2i32, Trunc);
5627  SDValue ExtractTrunc = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::i32,
5628  Trunc, DAG.getConstant(0, DL, MVT::i64));
5629 
5630  return DAG.getStore(ST->getChain(), DL, ExtractTrunc,
5631  ST->getBasePtr(), ST->getMemOperand());
5632 }
5633 
5634 // Custom lowering for any store, vector or scalar and/or default or with
5635 // a truncate operations. Currently only custom lower truncate operation
5636 // from vector v4i16 to v4i8 or volatile stores of i128.
5637 SDValue AArch64TargetLowering::LowerSTORE(SDValue Op,
5638  SelectionDAG &DAG) const {
5639  SDLoc Dl(Op);
5640  StoreSDNode *StoreNode = cast<StoreSDNode>(Op);
5641  assert (StoreNode && "Can only custom lower store nodes");
5642 
5643  SDValue Value = StoreNode->getValue();
5644 
5645  EVT VT = Value.getValueType();
5646  EVT MemVT = StoreNode->getMemoryVT();
5647 
5648  if (VT.isVector()) {
5650  VT,
5651  /*OverrideNEON=*/Subtarget->useSVEForFixedLengthVectors()))
5652  return LowerFixedLengthVectorStoreToSVE(Op, DAG);
5653 
5654  unsigned AS = StoreNode->getAddressSpace();
5655  Align Alignment = StoreNode->getAlign();
5656  if (Alignment < MemVT.getStoreSize() &&
5657  !allowsMisalignedMemoryAccesses(MemVT, AS, Alignment,
5658  StoreNode->getMemOperand()->getFlags(),
5659  nullptr)) {
5660  return scalarizeVectorStore(StoreNode, DAG);
5661  }
5662 
5663  if (StoreNode->isTruncatingStore() && VT == MVT::v4i16 &&
5664  MemVT == MVT::v4i8) {
5665  return LowerTruncateVectorStore(Dl, StoreNode, VT, MemVT, DAG);
5666  }
5667  // 256 bit non-temporal stores can be lowered to STNP. Do this as part of
5668  // the custom lowering, as there are no un-paired non-temporal stores and
5669  // legalization will break up 256 bit inputs.
5671  if (StoreNode->isNonTemporal() && MemVT.getSizeInBits() == 256u &&
5672  EC.isKnownEven() &&
5673  ((MemVT.getScalarSizeInBits() == 8u ||
5674  MemVT.getScalarSizeInBits() == 16u ||
5675  MemVT.getScalarSizeInBits() == 32u ||
5676  MemVT.getScalarSizeInBits() == 64u))) {
5677  SDValue Lo =
5679  MemVT.getHalfNumVectorElementsVT(*DAG.getContext()),
5680  StoreNode->getValue(), DAG.getConstant(0, Dl, MVT::i64));
5681  SDValue Hi =
5683  MemVT.getHalfNumVectorElementsVT(*DAG.getContext()),
5684  StoreNode->getValue(),
5685  DAG.getConstant(EC.getKnownMinValue() / 2, Dl, MVT::i64));
5688  {StoreNode->getChain(), Lo, Hi, StoreNode->getBasePtr()},
5689  StoreNode->getMemoryVT(), StoreNode->getMemOperand());
5690  return Result;
5691  }
5692  } else if (MemVT == MVT::i128 && StoreNode->isVolatile()) {
5693  return LowerStore128(Op, DAG);
5694  } else if (MemVT == MVT::i64x8) {
5695  SDValue Value = StoreNode->getValue();
5696  assert(Value->getValueType(0) == MVT::i64x8);
5697  SDValue Chain = StoreNode->getChain();
5698  SDValue Base = StoreNode->getBasePtr();
5699  EVT PtrVT = Base.getValueType();
5700  for (unsigned i = 0; i < 8; i++) {
5702  Value, DAG.getConstant(i, Dl, MVT::i32));
5703  SDValue Ptr = DAG.getNode(ISD::ADD, Dl, PtrVT, Base,
5704  DAG.getConstant(i * 8, Dl, PtrVT));
5705  Chain = DAG.getStore(Chain, Dl, Part, Ptr, StoreNode->getPointerInfo(),
5706  StoreNode->getOriginalAlign());
5707  }
5708  return Chain;
5709  }
5710 
5711  return SDValue();
5712 }
5713 
5714 /// Lower atomic or volatile 128-bit stores to a single STP instruction.
5715 SDValue AArch64TargetLowering::LowerStore128(SDValue Op,
5716  SelectionDAG &DAG) const {
5717  MemSDNode *StoreNode = cast<MemSDNode>(Op);
5718  assert(StoreNode->getMemoryVT() == MVT::i128);
5719  assert(StoreNode->isVolatile() || StoreNode->isAtomic());
5720 
5721  bool IsStoreRelease =
5723  if (StoreNode->isAtomic())
5724  assert((Subtarget->hasFeature(AArch64::FeatureLSE2) &&
5725  Subtarget->hasFeature(AArch64::FeatureRCPC3) && IsStoreRelease) ||
5726  StoreNode->getMergedOrdering() == AtomicOrdering::Unordered ||
5728 
5729  SDValue Value = StoreNode->getOpcode() == ISD::STORE
5730  ? StoreNode->getOperand(1)
5731  : StoreNode->getOperand(2);
5732  SDLoc DL(Op);
5734  DAG.getConstant(0, DL, MVT::i64));
5736  DAG.getConstant(1, DL, MVT::i64));
5737 
5738  unsigned Opcode = IsStoreRelease ? AArch64ISD::STILP : AArch64ISD::STP;
5740  Opcode, DL, DAG.getVTList(MVT::Other),
5741  {StoreNode->getChain(), Lo, Hi, StoreNode->getBasePtr()},
5742  StoreNode->getMemoryVT(), StoreNode->getMemOperand());
5743  return Result;
5744 }
5745 
5746 SDValue AArch64TargetLowering::LowerLOAD(SDValue Op,
5747  SelectionDAG &DAG) const {
5748  SDLoc DL(Op);
5749  LoadSDNode *LoadNode = cast<LoadSDNode>(Op);
5750  assert(LoadNode && "Expected custom lowering of a load node");
5751 
5752  if (LoadNode->getMemoryVT() == MVT::i64x8) {
5754  SDValue Base = LoadNode->getBasePtr();
5755  SDValue Chain = LoadNode->getChain();
5756  EVT PtrVT = Base.getValueType();
5757  for (unsigned i = 0; i < 8; i++) {
5758  SDValue Ptr = DAG.getNode(ISD::ADD, DL, PtrVT, Base,
5759  DAG.getConstant(i * 8, DL, PtrVT));
5760  SDValue Part = DAG.getLoad(MVT::i64, DL, Chain, Ptr,
5761  LoadNode->getPointerInfo(),
5762  LoadNode->getOriginalAlign());
5763  Ops.push_back(Part);
5764  Chain = SDValue(Part.getNode(), 1);
5765  }
5766  SDValue Loaded = DAG.getNode(AArch64ISD::LS64_BUILD, DL, MVT::i64x8, Ops);
5767  return DAG.getMergeValues({Loaded, Chain}, DL);
5768  }
5769 
5770  // Custom lowering for extending v4i8 vector loads.
5771  EVT VT = Op->getValueType(0);
5772  assert((VT == MVT::v4i16 || VT == MVT::v4i32) && "Expected v4i16 or v4i32");
5773 
5774  if (LoadNode->getMemoryVT() != MVT::v4i8)
5775  return SDValue();
5776 
5777  unsigned ExtType;
5778  if (LoadNode->getExtensionType() == ISD::SEXTLOAD)
5779  ExtType = ISD::SIGN_EXTEND;
5780  else if (LoadNode->getExtensionType() == ISD::ZEXTLOAD ||
5781  LoadNode->getExtensionType() == ISD::EXTLOAD)
5782  ExtType = ISD::ZERO_EXTEND;
5783  else
5784  return SDValue();
5785 
5786  SDValue Load = DAG.getLoad(MVT::f32, DL, LoadNode->getChain(),
5787  LoadNode->getBasePtr(), MachinePointerInfo());
5788  SDValue Chain = Load.getValue(1);
5790  SDValue BC = DAG.getNode(ISD::BITCAST, DL, MVT::v8i8, Vec);
5791  SDValue Ext = DAG.getNode(ExtType, DL, MVT::v8i16, BC);
5793  DAG.getConstant(0, DL, MVT::i64));
5794  if (VT == MVT::v4i32)
5795  Ext = DAG.getNode(ExtType, DL, MVT::v4i32, Ext);
5796  return DAG.getMergeValues({Ext, Chain}, DL);
5797 }
5798 
5799 // Generate SUBS and CSEL for integer abs.
5800 SDValue AArch64TargetLowering::LowerABS(SDValue Op, SelectionDAG &DAG) const {
5801  MVT VT = Op.getSimpleValueType();
5802 
5803  if (VT.isVector())
5804  return LowerToPredicatedOp(Op, DAG, AArch64ISD::ABS_MERGE_PASSTHRU);
5805 
5806  SDLoc DL(Op);
5807  SDValue Neg = DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT),
5808  Op.getOperand(0));
5809  // Generate SUBS & CSEL.
5810  SDValue Cmp =
5811  DAG.getNode(AArch64ISD::SUBS, DL, DAG.getVTList(VT, MVT::i32),
5812  Op.getOperand(0), DAG.getConstant(0, DL, VT));
5813  return DAG.getNode(AArch64ISD::CSEL, DL, VT, Op.getOperand(0), Neg,
5815  Cmp.getValue(1));
5816 }
5817 
5819  SDValue Chain = Op.getOperand(0);
5820  SDValue Cond = Op.getOperand(1);
5821  SDValue Dest = Op.getOperand(2);
5822 
5824  if (SDValue Cmp = emitConjunction(DAG, Cond, CC)) {
5825  SDLoc dl(Op);
5826  SDValue CCVal = DAG.getConstant(CC, dl, MVT::i32);
5827  return DAG.getNode(AArch64ISD::BRCOND, dl, MVT::Other, Chain, Dest, CCVal,
5828  Cmp);
5829  }
5830 
5831  return SDValue();
5832 }
5833 
5835  SelectionDAG &DAG) const {
5836  LLVM_DEBUG(dbgs() << "Custom lowering: ");
5837  LLVM_DEBUG(Op.dump());
5838 
5839  switch (Op.getOpcode()) {
5840  default:
5841  llvm_unreachable("unimplemented operand");
5842  return SDValue();
5843  case ISD::BITCAST:
5844  return LowerBITCAST(Op, DAG);
5845  case ISD::GlobalAddress:
5846  return LowerGlobalAddress(Op, DAG);
5847  case ISD::GlobalTLSAddress:
5848  return LowerGlobalTLSAddress(Op, DAG);
5849  case ISD::SETCC:
5850  case ISD::STRICT_FSETCC:
5851  case ISD::STRICT_FSETCCS:
5852  return LowerSETCC(Op, DAG);
5853  case ISD::SETCCCARRY:
5854  return LowerSETCCCARRY(Op, DAG);
5855  case ISD::BRCOND:
5856  return LowerBRCOND(Op, DAG);
5857  case ISD::BR_CC:
5858  return LowerBR_CC(Op, DAG);
5859  case ISD::SELECT:
5860  return LowerSELECT(Op, DAG);
5861  case ISD::SELECT_CC:
5862  return LowerSELECT_CC(Op, DAG);
5863  case ISD::JumpTable:
5864  return LowerJumpTable(Op, DAG);
5865  case ISD::BR_JT:
5866  return LowerBR_JT(Op, DAG);
5867  case ISD::ConstantPool:
5868  return LowerConstantPool(Op, DAG);
5869  case ISD::BlockAddress:
5870  return LowerBlockAddress(Op, DAG);
5871  case ISD::VASTART:
5872  return LowerVASTART(Op, DAG);
5873  case ISD::VACOPY:
5874  return LowerVACOPY(Op, DAG);
5875  case ISD::VAARG:
5876  return LowerVAARG(Op, DAG);
5877  case ISD::ADDCARRY:
5878  return lowerADDSUBCARRY(Op, DAG, AArch64ISD::ADCS, false /*unsigned*/);
5879  case ISD::SUBCARRY:
5880  return lowerADDSUBCARRY(Op, DAG, AArch64ISD::SBCS, false /*unsigned*/);
5881  case ISD::SADDO_CARRY:
5882  return lowerADDSUBCARRY(Op, DAG, AArch64ISD::ADCS, true /*signed*/);
5883  case ISD::SSUBO_CARRY:
5884  return lowerADDSUBCARRY(Op, DAG, AArch64ISD::SBCS, true /*signed*/);
5885  case ISD::SADDO:
5886  case ISD::UADDO:
5887  case ISD::SSUBO:
5888  case ISD::USUBO:
5889  case ISD::SMULO:
5890  case ISD::UMULO:
5891  return LowerXALUO(Op, DAG);
5892  case ISD::FADD:
5893  return LowerToPredicatedOp(Op, DAG, AArch64ISD::FADD_PRED);
5894  case ISD::FSUB:
5895  return LowerToPredicatedOp(Op, DAG, AArch64ISD::FSUB_PRED);
5896  case ISD::FMUL:
5897  return LowerToPredicatedOp(Op, DAG, AArch64ISD::FMUL_PRED);
5898  case ISD::FMA:
5899  return LowerToPredicatedOp(Op, DAG, AArch64ISD::FMA_PRED);
5900  case ISD::FDIV:
5901  return LowerToPredicatedOp(Op, DAG, AArch64ISD::FDIV_PRED);
5902  case ISD::FNEG:
5903  return LowerToPredicatedOp(Op, DAG, AArch64ISD::FNEG_MERGE_PASSTHRU);
5904  case ISD::FCEIL:
5905  return LowerToPredicatedOp(Op, DAG, AArch64ISD::FCEIL_MERGE_PASSTHRU);
5906  case ISD::FFLOOR:
5907  return LowerToPredicatedOp(Op, DAG, AArch64ISD::FFLOOR_MERGE_PASSTHRU);
5908  case ISD::FNEARBYINT:
5909  return LowerToPredicatedOp(Op, DAG, AArch64ISD::FNEARBYINT_MERGE_PASSTHRU);
5910  case ISD::FRINT:
5911  return LowerToPredicatedOp(Op, DAG, AArch64ISD::FRINT_MERGE_PASSTHRU);
5912  case ISD::FROUND:
5913  return LowerToPredicatedOp(Op, DAG, AArch64ISD::FROUND_MERGE_PASSTHRU);
5914  case ISD::FROUNDEVEN:
5915  return LowerToPredicatedOp(Op, DAG, AArch64ISD::FROUNDEVEN_MERGE_PASSTHRU);
5916  case ISD::FTRUNC:
5917  return LowerToPredicatedOp(Op, DAG, AArch64ISD::FTRUNC_MERGE_PASSTHRU);
5918  case ISD::FSQRT:
5919  return LowerToPredicatedOp(Op, DAG, AArch64ISD::FSQRT_MERGE_PASSTHRU);
5920  case ISD::FABS:
5921  return LowerToPredicatedOp(Op, DAG, AArch64ISD::FABS_MERGE_PASSTHRU);
5922  case ISD::FP_ROUND:
5923  case ISD::STRICT_FP_ROUND:
5924  return LowerFP_ROUND(Op, DAG);
5925  case ISD::FP_EXTEND:
5926  return LowerFP_EXTEND(Op, DAG);
5927  case ISD::FRAMEADDR:
5928  return LowerFRAMEADDR(Op, DAG);
5929  case ISD::SPONENTRY:
5930  return LowerSPONENTRY(Op, DAG);
5931  case ISD::RETURNADDR:
5932  return LowerRETURNADDR(Op, DAG);
5933  case ISD::ADDROFRETURNADDR:
5934  return LowerADDROFRETURNADDR(Op, DAG);
5935  case ISD::CONCAT_VECTORS:
5936  return LowerCONCAT_VECTORS(Op, DAG);
5938  return LowerINSERT_VECTOR_ELT(Op, DAG);
5940  return LowerEXTRACT_VECTOR_ELT(Op, DAG);
5941  case ISD::BUILD_VECTOR:
5942  return LowerBUILD_VECTOR(Op, DAG);
5944  return LowerZERO_EXTEND_VECTOR_INREG(Op, DAG);
5945  case ISD::VECTOR_SHUFFLE:
5946  return LowerVECTOR_SHUFFLE(Op, DAG);
5947  case ISD::SPLAT_VECTOR:
5948  return LowerSPLAT_VECTOR(Op, DAG);
5950  return LowerEXTRACT_SUBVECTOR(Op, DAG);
5951  case ISD::INSERT_SUBVECTOR:
5952  return LowerINSERT_SUBVECTOR(Op, DAG);
5953  case ISD::SDIV:
5954  case ISD::UDIV:
5955  return LowerDIV(Op, DAG);
5956  case ISD::SMIN:
5957  case ISD::UMIN:
5958  case ISD::SMAX:
5959  case ISD::UMAX:
5960  return LowerMinMax(Op, DAG);
5961  case ISD::SRA:
5962  case ISD::SRL:
5963  case ISD::SHL:
5964  return LowerVectorSRA_SRL_SHL(Op, DAG);
5965  case ISD::SHL_PARTS:
5966  case ISD::SRL_PARTS:
5967  case ISD::SRA_PARTS:
5968  return LowerShiftParts(Op, DAG);
5969  case ISD::CTPOP:
5970  case ISD::PARITY:
5971  return LowerCTPOP_PARITY(Op, DAG);
5972  case ISD::FCOPYSIGN:
5973  return LowerFCOPYSIGN(Op, DAG);
5974  case ISD::OR:
5975  return LowerVectorOR(Op, DAG);
5976  case ISD::XOR:
5977  return LowerXOR(Op, DAG);
5978  case ISD::PREFETCH:
5979  return LowerPREFETCH(Op, DAG);
5980  case ISD::SINT_TO_FP:
5981  case ISD::UINT_TO_FP:
5984  return LowerINT_TO_FP(Op, DAG);
5985  case ISD::FP_TO_SINT:
5986  case ISD::FP_TO_UINT:
5989  return LowerFP_TO_INT(Op, DAG);
5990  case ISD::FP_TO_SINT_SAT:
5991  case ISD::FP_TO_UINT_SAT:
5992  return LowerFP_TO_INT_SAT(Op, DAG);
5993  case ISD::FSINCOS:
5994  return LowerFSINCOS(Op, DAG);
5995  case ISD::GET_ROUNDING:
5996  return LowerGET_ROUNDING(Op, DAG);
5997  case ISD::SET_ROUNDING:
5998  return LowerSET_ROUNDING(Op, DAG);
5999  case ISD::MUL:
6000  return LowerMUL(Op, DAG);
6001  case ISD::MULHS:
6002  return LowerToPredicatedOp(Op, DAG, AArch64ISD::MULHS_PRED);
6003  case ISD::MULHU:
6004  return LowerToPredicatedOp(Op, DAG, AArch64ISD::MULHU_PRED);
6006  return LowerINTRINSIC_W_CHAIN(Op, DAG);
6008  return LowerINTRINSIC_WO_CHAIN(Op, DAG);
6009  case ISD::INTRINSIC_VOID:
6010  return LowerINTRINSIC_VOID(Op, DAG);
6011  case ISD::ATOMIC_STORE:
6012  if (cast<MemSDNode>(Op)->getMemoryVT() == MVT::i128) {
6013  assert(Subtarget->hasLSE2() || Subtarget->hasRCPC3());
6014  return LowerStore128(Op, DAG);
6015  }
6016  return SDValue();
6017  case ISD::STORE:
6018  return LowerSTORE(Op, DAG);
6019  case ISD::MSTORE:
6020  return LowerFixedLengthVectorMStoreToSVE(Op, DAG);
6021  case ISD::MGATHER:
6022  return LowerMGATHER(Op, DAG);
6023  case ISD::MSCATTER:
6024  return LowerMSCATTER(Op, DAG);
6026  return LowerVECREDUCE_SEQ_FADD(Op, DAG);
6027  case ISD::VECREDUCE_ADD:
6028  case ISD::VECREDUCE_AND:
6029  case ISD::VECREDUCE_OR:
6030  case ISD::VECREDUCE_XOR:
6031  case ISD::VECREDUCE_SMAX:
6032  case ISD::VECREDUCE_SMIN:
6033  case ISD::VECREDUCE_UMAX:
6034  case ISD::VECREDUCE_UMIN:
6035  case ISD::VECREDUCE_FADD:
6036  case ISD::VECREDUCE_FMAX:
6037  case ISD::VECREDUCE_FMIN:
6038  return LowerVECREDUCE(Op, DAG);
6039  case ISD::ATOMIC_LOAD_SUB:
6040  return LowerATOMIC_LOAD_SUB(Op, DAG);
6041  case ISD::ATOMIC_LOAD_AND:
6042  return LowerATOMIC_LOAD_AND(Op, DAG);
6044  return LowerDYNAMIC_STACKALLOC(Op, DAG);
6045  case ISD::VSCALE:
6046  return LowerVSCALE(Op, DAG);
6047  case ISD::ANY_EXTEND:
6048  case ISD::SIGN_EXTEND:
6049  case ISD::ZERO_EXTEND:
6050  return LowerFixedLengthVectorIntExtendToSVE(Op, DAG);
6051  case ISD::SIGN_EXTEND_INREG: {
6052  // Only custom lower when ExtraVT has a legal byte based element type.
6053  EVT ExtraVT = cast<VTSDNode>(Op.getOperand(1))->getVT();
6054  EVT ExtraEltVT = ExtraVT.getVectorElementType();
6055  if ((ExtraEltVT != MVT::i8) && (ExtraEltVT != MVT::i16) &&
6056  (ExtraEltVT != MVT::i32) && (ExtraEltVT != MVT::i64))
6057  return SDValue();
6058 
6059  return LowerToPredicatedOp(Op, DAG,
6061  }
6062  case ISD::TRUNCATE:
6063  return LowerTRUNCATE(Op, DAG);
6064  case ISD::MLOAD:
6065  return LowerMLOAD(Op, DAG);
6066  case ISD::LOAD:
6067  if (useSVEForFixedLengthVectorVT(Op.getValueType(),
6068  Subtarget->forceStreamingCompatibleSVE()))
6069  return LowerFixedLengthVectorLoadToSVE(Op, DAG);
6070  return LowerLOAD(Op, DAG);
6071  case ISD::ADD:
6072  case ISD::AND:
6073  case ISD::SUB:
6074  return LowerToScalableOp(Op, DAG);
6075  case ISD::FMAXIMUM:
6076  return LowerToPredicatedOp(Op, DAG, AArch64ISD::FMAX_PRED);
6077  case ISD::FMAXNUM:
6078  return LowerToPredicatedOp(Op, DAG, AArch64ISD::FMAXNM_PRED);
6079  case ISD::FMINIMUM:
6080  return LowerToPredicatedOp(Op, DAG, AArch64ISD::FMIN_PRED);
6081  case ISD::FMINNUM:
6082  return LowerToPredicatedOp(Op, DAG, AArch64ISD::FMINNM_PRED);
6083  case ISD::VSELECT:
6084  return LowerFixedLengthVectorSelectToSVE(Op, DAG);
6085  case ISD::ABS:
6086  return LowerABS(Op, DAG);
6087  case ISD::ABDS:
6088  return LowerToPredicatedOp(Op, DAG, AArch64ISD::ABDS_PRED);
6089  case ISD::ABDU:
6090  return LowerToPredicatedOp(Op, DAG, AArch64ISD::ABDU_PRED);
6091  case ISD::AVGFLOORS:
6092  return LowerToPredicatedOp(Op, DAG, AArch64ISD::HADDS_PRED);
6093  case ISD::AVGFLOORU:
6094  return LowerToPredicatedOp(Op, DAG, AArch64ISD::HADDU_PRED);
6095  case ISD::AVGCEILS:
6096  return LowerToPredicatedOp(Op, DAG, AArch64ISD::RHADDS_PRED);
6097  case ISD::AVGCEILU:
6098  return LowerToPredicatedOp(Op, DAG, AArch64ISD::RHADDU_PRED);
6099  case ISD::BITREVERSE:
6100  return LowerBitreverse(Op, DAG);
6101  case ISD::BSWAP:
6102  return LowerToPredicatedOp(Op, DAG, AArch64ISD::BSWAP_MERGE_PASSTHRU);
6103  case ISD::CTLZ:
6104  return LowerToPredicatedOp(Op, DAG, AArch64ISD::CTLZ_MERGE_PASSTHRU);
6105  case ISD::CTTZ:
6106  return LowerCTTZ(Op, DAG);
6107  case ISD::VECTOR_SPLICE:
6108  return LowerVECTOR_SPLICE(Op, DAG);
6109  case ISD::STRICT_LROUND:
6110  case ISD::STRICT_LLROUND:
6111  case ISD::STRICT_LRINT:
6112  case ISD::STRICT_LLRINT: {
6113  assert(Op.getOperand(1).getValueType() == MVT::f16 &&
6114  "Expected custom lowering of rounding operations only for f16");
6115  SDLoc DL(Op);
6117  {Op.getOperand(0), Op.getOperand(1)});
6118  return DAG.getNode(Op.getOpcode(), DL, {Op.getValueType(), MVT::Other},
6119  {Ext.getValue(1), Ext.getValue(0)});
6120  }
6121  case ISD::WRITE_REGISTER: {
6122  assert(Op.getOperand(2).getValueType() == MVT::i128 &&
6123  "WRITE_REGISTER custom lowering is only for 128-bit sysregs");
6124  SDLoc DL(Op);
6125 
6126  SDValue Chain = Op.getOperand(0);
6127  SDValue SysRegName = Op.getOperand(1);
6128  SDValue Pair = Op.getOperand(2);
6129 
6130  SDValue PairLo = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i64, Pair,
6131  DAG.getConstant(0, DL, MVT::i32));
6132  SDValue PairHi = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i64, Pair,
6133  DAG.getConstant(1, DL, MVT::i32));
6134 
6135  // chain = MSRR(chain, sysregname, lo, hi)
6136  SDValue Result = DAG.getNode(AArch64ISD::MSRR, DL, MVT::Other, Chain,
6137  SysRegName, PairLo, PairHi);
6138 
6139  return Result;
6140  }
6141  }
6142 }
6143 
6145  return !Subtarget->useSVEForFixedLengthVectors();
6146 }
6147 
6149  return true;
6150 }
6151 
6153  EVT VT, bool OverrideNEON) const {
6154  if (!VT.isFixedLengthVector() || !VT.isSimple())
6155  return false;
6156 
6157  // Don't use SVE for vectors we cannot scalarize if required.
6158  switch (VT.getVectorElementType().getSimpleVT().SimpleTy) {
6159  // Fixed length predicates should be promoted to i8.
6160  // NOTE: This is consistent with how NEON (and thus 64/128bit vectors) work.
6161  case MVT::i1:
6162  default:
6163  return false;
6164  case MVT::i8:
6165  case MVT::i16:
6166  case MVT::i32:
6167  case MVT::i64:
6168  case MVT::f16:
6169  case MVT::f32:
6170  case MVT::f64:
6171  break;
6172  }
6173 
6174  // All SVE implementations support NEON sized vectors.
6175  if (OverrideNEON && (VT.is128BitVector() || VT.is64BitVector()))
6176  return Subtarget->hasSVE();
6177 
6178  // Ensure NEON MVTs only belong to a single register class.
6179  if (VT.getFixedSizeInBits() <= 128)
6180  return false;
6181 
6182  // Ensure wider than NEON code generation is enabled.
6183  if (!Subtarget->useSVEForFixedLengthVectors())
6184  return false;
6185 
6186  // Don't use SVE for types that don't fit.
6187  if (VT.getFixedSizeInBits() > Subtarget->getMinSVEVectorSizeInBits())
6188  return false;
6189 
6190  // TODO: Perhaps an artificial restriction, but worth having whilst getting
6191  // the base fixed length SVE support in place.
6192  if (!VT.isPow2VectorType())
6193  return false;
6194 
6195  return true;
6196 }
6197 
6198 //===----------------------------------------------------------------------===//
6199 // Calling Convention Implementation
6200 //===----------------------------------------------------------------------===//
6201 
6202 static unsigned getIntrinsicID(const SDNode *N) {
6203  unsigned Opcode = N->getOpcode();
6204  switch (Opcode) {
6205  default:
6206  return Intrinsic::not_intrinsic;
6207  case ISD::INTRINSIC_WO_CHAIN: {
6208  unsigned IID = cast<ConstantSDNode>(N->getOperand(0))->getZExtValue();
6209  if (IID < Intrinsic::num_intrinsics)
6210  return IID;
6211  return Intrinsic::not_intrinsic;
6212  }
6213  }
6214 }
6215 
6217  SDValue N1) const {
6218  if (!N0.hasOneUse())
6219  return false;
6220 
6221  unsigned IID = getIntrinsicID(N1.getNode());
6222  // Avoid reassociating expressions that can be lowered to smlal/umlal.
6223  if (IID == Intrinsic::aarch64_neon_umull ||
6224  N1.getOpcode() == AArch64ISD::UMULL ||
6225  IID == Intrinsic::aarch64_neon_smull ||
6226  N1.getOpcode() == AArch64ISD::SMULL)
6227  return N0.getOpcode() != ISD::ADD;
6228 
6229  return true;
6230 }
6231 
6232 /// Selects the correct CCAssignFn for a given CallingConvention value.
6234  bool IsVarArg) const {
6235  switch (CC) {
6236  default:
6237  report_fatal_error("Unsupported calling convention.");
6239  return CC_AArch64_WebKit_JS;
6240  case CallingConv::GHC:
6241  return CC_AArch64_GHC;
6242  case CallingConv::C:
6243  case CallingConv::Fast:
6246  case CallingConv::Swift:
6248  case CallingConv::Tail:
6249  if (Subtarget->isTargetWindows() && IsVarArg) {
6250  if (Subtarget->isWindowsArm64EC())
6252  return CC_AArch64_Win64_VarArg;
6253  }
6254  if (!Subtarget->isTargetDarwin())
6255  return CC_AArch64_AAPCS;
6256  if (!IsVarArg)
6257  return CC_AArch64_DarwinPCS;
6258  return Subtarget->isTargetILP32() ? CC_AArch64_DarwinPCS_ILP32_VarArg
6260  case CallingConv::Win64:
6261  if (IsVarArg) {
6262  if (Subtarget->isWindowsArm64EC())
6264  return CC_AArch64_Win64_VarArg;
6265  }
6266  return CC_AArch64_AAPCS;
6273  return CC_AArch64_AAPCS;
6274  }
6275 }
6276 
6277 CCAssignFn *
6281 }
6282 
6283 
6284 unsigned
6285 AArch64TargetLowering::allocateLazySaveBuffer(SDValue &Chain, const SDLoc &DL,
6286  SelectionDAG &DAG) const {
6287  MachineFunction &MF = DAG.getMachineFunction();
6288  MachineFrameInfo &MFI = MF.getFrameInfo();
6289 
6290  // Allocate a lazy-save buffer object of size SVL.B * SVL.B (worst-case)
6292  DAG.getConstant(1, DL, MVT::i32));
6293  SDValue NN = DAG.getNode(ISD::MUL, DL, MVT::i64, N, N);
6294  SDValue Ops[] = {Chain, NN, DAG.getConstant(1, DL, MVT::i64)};
6295  SDVTList VTs = DAG.getVTList(MVT::i64, MVT::Other);
6296  SDValue Buffer = DAG.getNode(ISD::DYNAMIC_STACKALLOC, DL, VTs, Ops);
6297  Chain = Buffer.getValue(1);
6298  MFI.CreateVariableSizedObject(Align(1), nullptr);
6299 
6300  // Allocate an additional TPIDR2 object on the stack (16 bytes)
6301  unsigned TPIDR2Obj = MFI.CreateStackObject(16, Align(16), false);
6302 
6303  // Store the buffer pointer to the TPIDR2 stack object.
6304  MachinePointerInfo MPI = MachinePointerInfo::getStack(MF, TPIDR2Obj);
6305  SDValue Ptr = DAG.getFrameIndex(
6306  TPIDR2Obj,
6308  Chain = DAG.getStore(Chain, DL, Buffer, Ptr, MPI);
6309 
6310  return TPIDR2Obj;
6311 }
6312 
6313 SDValue AArch64TargetLowering::LowerFormalArguments(
6314  SDValue Chain, CallingConv::ID CallConv, bool isVarArg,
6315  const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &DL,
6316  SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
6317  MachineFunction &MF = DAG.getMachineFunction();
6318  const Function &F = MF.getFunction();
6319  MachineFrameInfo &MFI = MF.getFrameInfo();
6320  bool IsWin64 = Subtarget->isCallingConvWin64(F.getCallingConv());
6322 
6324  GetReturnInfo(CallConv, F.getReturnType(), F.getAttributes(), Outs,
6325  DAG.getTargetLoweringInfo(), MF.getDataLayout());
6326  if (any_of(Outs, [](ISD::OutputArg &Out){ return Out.VT.isScalableVector(); }))
6327  FuncInfo->setIsSVECC(true);
6328 
6329  // Assign locations to all of the incoming arguments.
6331  DenseMap<unsigned, SDValue> CopiedRegs;
6332  CCState CCInfo(CallConv, isVarArg, MF, ArgLocs, *DAG.getContext());
6333 
6334  // At this point, Ins[].VT may already be promoted to i32. To correctly
6335  // handle passing i8 as i8 instead of i32 on stack, we pass in both i32 and
6336  // i8 to CC_AArch64_AAPCS with i32 being ValVT and i8 being LocVT.
6337  // Since AnalyzeFormalArguments uses Ins[].VT for both ValVT and LocVT, here
6338  // we use a special version of AnalyzeFormalArguments to pass in ValVT and
6339  // LocVT.
6340  unsigned NumArgs = Ins.size();
6341  Function::const_arg_iterator CurOrigArg = F.arg_begin();
6342  unsigned CurArgIdx = 0;
6343  for (unsigned i = 0; i != NumArgs; ++i) {
6344  MVT ValVT = Ins[i].VT;
6345  if (Ins[i].isOrigArg()) {
6346  std::advance(CurOrigArg, Ins[i].getOrigArgIndex() - CurArgIdx);
6347  CurArgIdx = Ins[i].getOrigArgIndex();
6348 
6349  // Get type of the original argument.
6350  EVT ActualVT = getValueType(DAG.getDataLayout(), CurOrigArg->getType(),
6351  /*AllowUnknown*/ true);
6352  MVT ActualMVT = ActualVT.isSimple() ? ActualVT.getSimpleVT() : MVT::Other;
6353  // If ActualMVT is i1/i8/i16, we should set LocVT to i8/i8/i16.
6354  if (ActualMVT == MVT::i1 || ActualMVT == MVT::i8)
6355  ValVT = MVT::i8;
6356  else if (ActualMVT == MVT::i16)
6357  ValVT = MVT::i16;
6358  }
6359  bool UseVarArgCC = false;
6360  if (IsWin64)
6361  UseVarArgCC = isVarArg;
6362  CCAssignFn *AssignFn = CCAssignFnForCall(CallConv, UseVarArgCC);
6363  bool Res =
6364  AssignFn(i, ValVT, ValVT, CCValAssign::Full, Ins[i].Flags, CCInfo);
6365  assert(!Res && "Call operand has unhandled type");
6366  (void)Res;
6367  }
6368 
6369  SMEAttrs Attrs(MF.getFunction());
6370  bool IsLocallyStreaming =
6371  !Attrs.hasStreamingInterface() && Attrs.hasStreamingBody();
6372  assert(Chain.getOpcode() == ISD::EntryToken && "Unexpected Chain value");
6373  SDValue Glue = Chain.getValue(1);
6374 
6375  SmallVector<SDValue, 16> ArgValues;
6376  unsigned ExtraArgLocs = 0;
6377  for (unsigned i = 0, e = Ins.size(); i != e; ++i) {
6378  CCValAssign &VA = ArgLocs[i - ExtraArgLocs];
6379 
6380  if (Ins[i].Flags.isByVal()) {
6381  // Byval is used for HFAs in the PCS, but the system should work in a
6382  // non-compliant manner for larger structs.
6383  EVT PtrVT = getPointerTy(DAG.getDataLayout());
6384  int Size = Ins[i].Flags.getByValSize();
6385  unsigned NumRegs = (Size + 7) / 8;
6386 
6387  // FIXME: This works on big-endian for composite byvals, which are the common
6388  // case. It should also work for fundamental types too.
6389  unsigned FrameIdx =
6390  MFI.CreateFixedObject(8 * NumRegs, VA.getLocMemOffset(), false);
6391  SDValue FrameIdxN = DAG.getFrameIndex(FrameIdx, PtrVT);
6392  InVals.push_back(FrameIdxN);
6393 
6394  continue;
6395  }
6396 
6397  if (Ins[i].Flags.isSwiftAsync())
6399 
6400  SDValue ArgValue;
6401  if (VA.isRegLoc()) {
6402  // Arguments stored in registers.
6403  EVT RegVT = VA.getLocVT();
6404  const TargetRegisterClass *RC;
6405 
6406  if (RegVT == MVT::i32)
6407  RC = &AArch64::GPR32RegClass;
6408  else if (RegVT == MVT::i64)
6409  RC = &AArch64::GPR64RegClass;
6410  else if (RegVT == MVT::f16 || RegVT == MVT::bf16)
6411  RC = &AArch64::FPR16RegClass;
6412  else if (RegVT == MVT::f32)
6413  RC = &AArch64::FPR32RegClass;
6414  else if (RegVT == MVT::f64 || RegVT.is64BitVector())
6415  RC = &AArch64::FPR64RegClass;
6416  else if (RegVT == MVT::f128 || RegVT.is128BitVector())
6417  RC = &AArch64::FPR128RegClass;
6418  else if (RegVT.isScalableVector() &&
6419  RegVT.getVectorElementType() == MVT::i1) {
6420  FuncInfo->setIsSVECC(true);
6421  RC = &AArch64::PPRRegClass;
6422  } else if (RegVT.isScalableVector()) {
6423  FuncInfo->setIsSVECC(true);
6424  RC = &AArch64::ZPRRegClass;
6425  } else
6426  llvm_unreachable("RegVT not supported by FORMAL_ARGUMENTS Lowering");
6427 
6428  // Transform the arguments in physical registers into virtual ones.
6429  Register Reg = MF.addLiveIn(VA.getLocReg(), RC);
6430 
6431  if (IsLocallyStreaming) {
6432  // LocallyStreamingFunctions must insert the SMSTART in the correct
6433  // position, so we use Glue to ensure no instructions can be scheduled
6434  // between the chain of:
6435  // t0: ch,glue = EntryNode
6436  // t1: res,ch,glue = CopyFromReg
6437  // ...
6438  // tn: res,ch,glue = CopyFromReg t(n-1), ..
6439  // t(n+1): ch, glue = SMSTART t0:0, ...., tn:2
6440  // ^^^^^^
6441  // This will be the new Chain/Root node.
6442  ArgValue = DAG.getCopyFromReg(Chain, DL, Reg, RegVT, Glue);
6443  Glue = ArgValue.getValue(2);
6444  } else
6445  ArgValue = DAG.getCopyFromReg(Chain, DL, Reg, RegVT);
6446 
6447  // If this is an 8, 16 or 32-bit value, it is really passed promoted
6448  // to 64 bits. Insert an assert[sz]ext to capture this, then
6449  // truncate to the right size.
6450  switch (VA.getLocInfo()) {
6451  default:
6452  llvm_unreachable("Unknown loc info!");
6453  case CCValAssign::Full:
6454  break;
6455  case CCValAssign::Indirect:
6456  assert((VA.getValVT().isScalableVector() ||
6457  Subtarget->isWindowsArm64EC()) &&
6458  "Indirect arguments should be scalable on most subtargets");
6459  break;
6460  case CCValAssign::BCvt:
6461  ArgValue = DAG.getNode(ISD::BITCAST, DL, VA.getValVT(), ArgValue);
6462  break;
6463  case CCValAssign::AExt:
6464  case CCValAssign::SExt:
6465  case CCValAssign::ZExt:
6466  break;
6468  ArgValue = DAG.getNode(ISD::SRL, DL, RegVT, ArgValue,
6469  DAG.getConstant(32, DL, RegVT));
6470  ArgValue = DAG.getZExtOrTrunc(ArgValue, DL, VA.getValVT());
6471  break;
6472  }
6473  } else { // VA.isRegLoc()
6474  assert(VA.isMemLoc() && "CCValAssign is neither reg nor mem");
6475  unsigned ArgOffset = VA.getLocMemOffset();
6476  unsigned ArgSize = (VA.getLocInfo() == CCValAssign::Indirect
6477  ? VA.getLocVT().getSizeInBits()
6478  : VA.getValVT().getSizeInBits()) / 8;
6479 
6480  uint32_t BEAlign = 0;
6481  if (!Subtarget->isLittleEndian() && ArgSize < 8 &&
6482  !Ins[i].Flags.isInConsecutiveRegs())
6483  BEAlign = 8 - ArgSize;
6484 
6485  SDValue FIN;
6486  MachinePointerInfo PtrInfo;
6487  if (isVarArg && Subtarget->isWindowsArm64EC()) {
6488  // In the ARM64EC varargs convention, fixed arguments on the stack are
6489  // accessed relative to x4, not sp.
6490  unsigned ObjOffset = ArgOffset + BEAlign;
6491  Register VReg = MF.addLiveIn(AArch64::X4, &AArch64::GPR64RegClass);
6492  SDValue Val = DAG.getCopyFromReg(Chain, DL, VReg, MVT::i64);
6493  FIN = DAG.getNode(ISD::ADD, DL, MVT::i64, Val,
6494  DAG.getConstant(ObjOffset, DL, MVT::i64));
6496  } else {
6497  int FI = MFI.CreateFixedObject(ArgSize, ArgOffset + BEAlign, true);
6498 
6499  // Create load nodes to retrieve arguments from the stack.
6500  FIN = DAG.getFrameIndex(FI, getPointerTy(DAG.getDataLayout()));
6501  PtrInfo = MachinePointerInfo::getFixedStack(MF, FI);
6502  }
6503 
6504  // For NON_EXTLOAD, generic code in getLoad assert(ValVT == MemVT)
6506  MVT MemVT = VA.getValVT();
6507 
6508  switch (VA.getLocInfo()) {
6509  default:
6510  break;
6511  case CCValAssign::Trunc:
6512  case CCValAssign::BCvt:
6513  MemVT = VA.getLocVT();
6514  break;
6515  case CCValAssign::Indirect:
6516  assert((VA.getValVT().isScalableVector() ||
6517  Subtarget->isWindowsArm64EC()) &&
6518  "Indirect arguments should be scalable on most subtargets");
6519  MemVT = VA.getLocVT();
6520  break;
6521  case CCValAssign::SExt:
6522  ExtType = ISD::SEXTLOAD;
6523  break;
6524  case CCValAssign::ZExt:
6525  ExtType = ISD::ZEXTLOAD;
6526  break;
6527  case CCValAssign::AExt:
6528  ExtType = ISD::EXTLOAD;
6529  break;
6530  }
6531 
6532  ArgValue = DAG.getExtLoad(ExtType, DL, VA.getLocVT(), Chain, FIN, PtrInfo,
6533  MemVT);
6534  }
6535 
6536  if (VA.getLocInfo() == CCValAssign::Indirect) {
6537  assert(
6538  (VA.getValVT().isScalableVector() || Subtarget->isWindowsArm64EC()) &&
6539  "Indirect arguments should be scalable on most subtargets");
6540 
6541  uint64_t PartSize = VA.getValVT().getStoreSize().getKnownMinValue();
6542  unsigned NumParts = 1;
6543  if (Ins[i].Flags.isInConsecutiveRegs()) {
6544  assert(!Ins[i].Flags.isInConsecutiveRegsLast());
6545  while (!Ins[i + NumParts - 1].Flags.isInConsecutiveRegsLast())
6546  ++NumParts;
6547  }
6548 
6549  MVT PartLoad = VA.getValVT();
6550  SDValue Ptr = ArgValue;
6551 
6552  // Ensure we generate all loads for each tuple part, whilst updating the
6553  // pointer after each load correctly using vscale.
6554  while (NumParts > 0) {
6555  ArgValue = DAG.getLoad(PartLoad, DL, Chain, Ptr, MachinePointerInfo());
6556  InVals.push_back(ArgValue);
6557  NumParts--;
6558  if (NumParts > 0) {
6559  SDValue BytesIncrement;
6560  if (PartLoad.isScalableVector()) {
6561  BytesIncrement = DAG.getVScale(
6562  DL, Ptr.getValueType(),
6563  APInt(Ptr.getValueSizeInBits().getFixedValue(), PartSize));
6564  } else {
6565  BytesIncrement = DAG.getConstant(
6566  APInt(Ptr.getValueSizeInBits().getFixedValue(), PartSize), DL,
6567  Ptr.getValueType());
6568  }
6570  Flags.setNoUnsignedWrap(true);
6571  Ptr = DAG.getNode(ISD::ADD, DL, Ptr.getValueType(), Ptr,
6572  BytesIncrement, Flags);
6573  ExtraArgLocs++;
6574  i++;
6575  }
6576  }
6577  } else {
6578  if (Subtarget->isTargetILP32() && Ins[i].Flags.isPointer())
6579  ArgValue = DAG.getNode(ISD::AssertZext, DL, ArgValue.getValueType(),
6580  ArgValue, DAG.getValueType(MVT::i32));
6581 
6582  // i1 arguments are zero-extended to i8 by the caller. Emit a
6583  // hint to reflect this.
6584  if (Ins[i].isOrigArg()) {
6585  Argument *OrigArg = F.getArg(Ins[i].getOrigArgIndex());
6586  if (OrigArg->getType()->isIntegerTy(1)) {
6587  if (!Ins[i].Flags.isZExt()) {
6588  ArgValue = DAG.getNode(AArch64ISD::ASSERT_ZEXT_BOOL, DL,
6589  ArgValue.getValueType(), ArgValue);
6590  }
6591  }
6592  }
6593 
6594  InVals.push_back(ArgValue);
6595  }
6596  }
6597  assert((ArgLocs.size() + ExtraArgLocs) == Ins.size());
6598 
6599  // Insert the SMSTART if this is a locally streaming function and
6600  // make sure it is Glued to the last CopyFromReg value.
6601  if (IsLocallyStreaming) {
6602  const AArch64RegisterInfo *TRI = Subtarget->getRegisterInfo();
6603  Chain = DAG.getNode(
6605  {DAG.getRoot(),
6606  DAG.getTargetConstant((int32_t)AArch64SVCR::SVCRSM, DL, MVT::i32),
6607  DAG.getConstant(0, DL, MVT::i64), DAG.getConstant(1, DL, MVT::i64),
6608  DAG.getRegisterMask(TRI->getSMStartStopCallPreservedMask()), Glue});
6609  // Ensure that the SMSTART happens after the CopyWithChain such that its
6610  // chain result is used.
6611  for (unsigned I=0; I<InVals.size(); ++I) {
6613  getRegClassFor(InVals[I].getValueType().getSimpleVT()));
6614  Chain = DAG.getCopyToReg(Chain, DL, Reg, InVals[I]);
6615  InVals[I] = DAG.getCopyFromReg(Chain, DL, Reg,
6616  InVals[I].getValueType());
6617  }
6618  }
6619 
6620  // varargs
6621  if (isVarArg) {
6622  if (!Subtarget->isTargetDarwin() || IsWin64) {
6623  // The AAPCS variadic function ABI is identical to the non-variadic
6624  // one. As a result there may be more arguments in registers and we should
6625  // save them for future reference.
6626  // Win64 variadic functions also pass arguments in registers, but all float
6627  // arguments are passed in integer registers.
6628  saveVarArgRegisters(CCInfo, DAG, DL, Chain);
6629  }
6630 
6631  // This will point to the next argument passed via stack.
6632  unsigned StackOffset = CCInfo.getNextStackOffset();
6633  // We currently pass all varargs at 8-byte alignment, or 4 for ILP32
6634  StackOffset = alignTo(StackOffset, Subtarget->isTargetILP32() ? 4 : 8);
6636  FuncInfo->setVarArgsStackIndex(MFI.CreateFixedObject(4, StackOffset, true));
6637 
6638  if (MFI.hasMustTailInVarArgFunc()) {
6639  SmallVector<MVT, 2> RegParmTypes;
6640  RegParmTypes.push_back(MVT::i64);
6641  RegParmTypes.push_back(MVT::f128);
6642  // Compute the set of forwarded registers. The rest are scratch.
6644  FuncInfo->getForwardedMustTailRegParms();
6645  CCInfo.analyzeMustTailForwardedRegisters(Forwards, RegParmTypes,
6647 
6648  // Conservatively forward X8, since it might be used for aggregate return.
6649  if (!CCInfo.isAllocated(AArch64::X8)) {
6650  Register X8VReg = MF.addLiveIn(AArch64::X8, &AArch64::GPR64RegClass);
6651  Forwards.push_back(ForwardedRegister(X8VReg, AArch64::X8, MVT::i64));
6652  }
6653  }
6654  }
6655 
6656  // On Windows, InReg pointers must be returned, so record the pointer in a
6657  // virtual register at the start of the function so it can be returned in the
6658  // epilogue.
6659  if (IsWin64) {
6660  for (unsigned I = 0, E = Ins.size(); I != E; ++I) {
6661  if (Ins[I].Flags.isInReg() && Ins[I].Flags.isSRet()) {
6662  assert(!FuncInfo->getSRetReturnReg());
6663 
6664  MVT PtrTy = getPointerTy(DAG.getDataLayout());
6665  Register Reg =
6667  FuncInfo->setSRetReturnReg(Reg);
6668 
6669  SDValue Copy = DAG.getCopyToReg(DAG.getEntryNode(), DL, Reg, InVals[I]);
6670  Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Copy, Chain);
6671  break;
6672  }
6673  }
6674  }
6675 
6676  unsigned StackArgSize = CCInfo.getNextStackOffset();
6677  bool TailCallOpt = MF.getTarget().Options.GuaranteedTailCallOpt;
6678  if (DoesCalleeRestoreStack(CallConv, TailCallOpt)) {
6679  // This is a non-standard ABI so by fiat I say we're allowed to make full
6680  // use of the stack area to be popped, which must be aligned to 16 bytes in
6681  // any case:
6682  StackArgSize = alignTo(StackArgSize, 16);
6683 
6684  // If we're expected to restore the stack (e.g. fastcc) then we'll be adding
6685  // a multiple of 16.
6686  FuncInfo->setArgumentStackToRestore(StackArgSize);
6687 
6688  // This realignment carries over to the available bytes below. Our own
6689  // callers will guarantee the space is free by giving an aligned value to
6690  // CALLSEQ_START.
6691  }
6692  // Even if we're not expected to free up the space, it's useful to know how
6693  // much is there while considering tail calls (because we can reuse it).
6694  FuncInfo->setBytesInStackArgArea(StackArgSize);
6695 
6696  if (Subtarget->hasCustomCallingConv())
6697  Subtarget->getRegisterInfo()->UpdateCustomCalleeSavedRegs(MF);
6698 
6699  // Conservatively assume the function requires the lazy-save mechanism.
6700  if (SMEAttrs(MF.getFunction()).hasZAState()) {
6701  unsigned TPIDR2Obj = allocateLazySaveBuffer(Chain, DL, DAG);
6702  FuncInfo->setLazySaveTPIDR2Obj(TPIDR2Obj);
6703  }
6704 
6705  return Chain;
6706 }
6707 
6708 void AArch64TargetLowering::saveVarArgRegisters(CCState &CCInfo,
6709  SelectionDAG &DAG,
6710  const SDLoc &DL,
6711  SDValue &Chain) const {
6712  MachineFunction &MF = DAG.getMachineFunction();
6713  MachineFrameInfo &MFI = MF.getFrameInfo();
6715  auto PtrVT = getPointerTy(DAG.getDataLayout());
6716  bool IsWin64 = Subtarget->isCallingConvWin64(MF.getFunction().getCallingConv());
6717 
6718  SmallVector<SDValue, 8> MemOps;
6719 
6721  unsigned NumGPRArgRegs = GPRArgRegs.size();
6722  if (Subtarget->isWindowsArm64EC()) {
6723  // In the ARM64EC ABI, only x0-x3 are used to pass arguments to varargs
6724  // functions.
6725  NumGPRArgRegs = 4;
6726  }
6727  unsigned FirstVariadicGPR = CCInfo.getFirstUnallocated(GPRArgRegs);
6728 
6729  unsigned GPRSaveSize = 8 * (NumGPRArgRegs - FirstVariadicGPR);
6730  int GPRIdx = 0;
6731  if (GPRSaveSize != 0) {
6732  if (IsWin64) {
6733  GPRIdx = MFI.CreateFixedObject(GPRSaveSize, -(int)GPRSaveSize, false);
6734  if (GPRSaveSize & 15)
6735  // The extra size here, if triggered, will always be 8.
6736  MFI.CreateFixedObject(16 - (GPRSaveSize & 15), -(int)alignTo(GPRSaveSize, 16), false);
6737  } else
6738  GPRIdx = MFI.CreateStackObject(GPRSaveSize, Align(8), false);
6739 
6740  SDValue FIN;
6741  if (Subtarget->isWindowsArm64EC()) {
6742  // With the Arm64EC ABI, we reserve the save area as usual, but we
6743  // compute its address relative to x4. For a normal AArch64->AArch64
6744  // call, x4 == sp on entry, but calls from an entry thunk can pass in a
6745  // different address.
6746  Register VReg = MF.addLiveIn(AArch64::X4, &AArch64::GPR64RegClass);
6747  SDValue Val = DAG.getCopyFromReg(Chain, DL, VReg, MVT::i64);
6748  FIN = DAG.getNode(ISD::SUB, DL, MVT::i64, Val,
6749  DAG.getConstant(GPRSaveSize, DL, MVT::i64));
6750  } else {
6751  FIN = DAG.getFrameIndex(GPRIdx, PtrVT);
6752  }
6753 
6754  for (unsigned i = FirstVariadicGPR; i < NumGPRArgRegs; ++i) {
6755  Register VReg = MF.addLiveIn(GPRArgRegs[i], &AArch64::GPR64RegClass);
6756  SDValue Val = DAG.getCopyFromReg(Chain, DL, VReg, MVT::i64);
6757  SDValue Store =
6758  DAG.getStore(Val.getValue(1), DL, Val, FIN,
6760  MF, GPRIdx, (i - FirstVariadicGPR) * 8)
6761  : MachinePointerInfo::getStack(MF, i * 8));
6762  MemOps.push_back(Store);
6763  FIN =
6764  DAG.getNode(ISD::ADD, DL, PtrVT, FIN, DAG.getConstant(8, DL, PtrVT));
6765  }
6766  }
6767  FuncInfo->setVarArgsGPRIndex(GPRIdx);
6768  FuncInfo->setVarArgsGPRSize(GPRSaveSize);
6769 
6770  if (Subtarget->hasFPARMv8() && !IsWin64) {
6772  const unsigned NumFPRArgRegs = FPRArgRegs.size();
6773  unsigned FirstVariadicFPR = CCInfo.getFirstUnallocated(FPRArgRegs);
6774 
6775  unsigned FPRSaveSize = 16 * (NumFPRArgRegs - FirstVariadicFPR);
6776  int FPRIdx = 0;
6777  if (FPRSaveSize != 0) {
6778  FPRIdx = MFI.CreateStackObject(FPRSaveSize, Align(16), false);
6779 
6780  SDValue FIN = DAG.getFrameIndex(FPRIdx, PtrVT);
6781 
6782  for (unsigned i = FirstVariadicFPR; i < NumFPRArgRegs; ++i) {
6783  Register VReg = MF.addLiveIn(FPRArgRegs[i], &AArch64::FPR128RegClass);
6784  SDValue Val = DAG.getCopyFromReg(Chain, DL, VReg, MVT::f128);
6785 
6786  SDValue Store = DAG.getStore(Val.getValue(1), DL, Val, FIN,
6787  MachinePointerInfo::getStack(MF, i * 16));
6788  MemOps.push_back(Store);
6789  FIN = DAG.getNode(ISD::ADD, DL, PtrVT, FIN,
6790  DAG.getConstant(16, DL, PtrVT));
6791  }
6792  }
6793  FuncInfo->setVarArgsFPRIndex(FPRIdx);
6794  FuncInfo->setVarArgsFPRSize(FPRSaveSize);
6795  }
6796 
6797  if (!MemOps.empty()) {
6798  Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, MemOps);
6799  }
6800 }
6801 
6802 /// LowerCallResult - Lower the result values of a call into the
6803 /// appropriate copies out of appropriate physical registers.
6804 SDValue AArch64TargetLowering::LowerCallResult(
6805  SDValue Chain, SDValue InFlag, CallingConv::ID CallConv, bool isVarArg,
6806  const SmallVectorImpl<CCValAssign> &RVLocs, const SDLoc &DL,
6807  SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals, bool isThisReturn,
6808  SDValue ThisVal) const {
6809  DenseMap<unsigned, SDValue> CopiedRegs;
6810  // Copy all of the result registers out of their specified physreg.
6811  for (unsigned i = 0; i != RVLocs.size(); ++i) {
6812  CCValAssign VA = RVLocs[i];
6813 
6814  // Pass 'this' value directly from the argument to return value, to avoid
6815  // reg unit interference
6816  if (i == 0 && isThisReturn) {
6817  assert(!VA.needsCustom() && VA.getLocVT() == MVT::i64 &&
6818  "unexpected return calling convention register assignment");
6819  InVals.push_back(ThisVal);
6820  continue;
6821  }
6822 
6823  // Avoid copying a physreg twice since RegAllocFast is incompetent and only
6824  // allows one use of a physreg per block.
6825  SDValue Val = CopiedRegs.lookup(VA.getLocReg());
6826  if (!Val) {
6827  Val =
6828  DAG.getCopyFromReg(Chain, DL, VA.getLocReg(), VA.getLocVT(), InFlag);
6829  Chain = Val.getValue(1);
6830  InFlag = Val.getValue(2);
6831  CopiedRegs[VA.getLocReg()] = Val;
6832  }
6833 
6834  switch (VA.getLocInfo()) {
6835  default:
6836  llvm_unreachable("Unknown loc info!");
6837  case CCValAssign::Full:
6838  break;
6839  case CCValAssign::BCvt:
6840  Val = DAG.getNode(ISD::BITCAST, DL, VA.getValVT(), Val);
6841  break;
6843  Val = DAG.getNode(ISD::SRL, DL, VA.getLocVT(), Val,
6844  DAG.getConstant(32, DL, VA.getLocVT()));
6845  [[fallthrough]];
6846  case CCValAssign::AExt:
6847  [[fallthrough]];
6848  case CCValAssign::ZExt:
6849  Val = DAG.getZExtOrTrunc(Val, DL, VA.getValVT());
6850  break;
6851  }
6852 
6853  InVals.push_back(Val);
6854  }
6855 
6856  return Chain;
6857 }
6858 
6859 /// Return true if the calling convention is one that we can guarantee TCO for.
6860 static bool canGuaranteeTCO(CallingConv::ID CC, bool GuaranteeTailCalls) {
6861  return (CC == CallingConv::Fast && GuaranteeTailCalls) ||
6863 }
6864 
6865 /// Return true if we might ever do TCO for calls with this calling convention.
6867  switch (CC) {
6868  case CallingConv::C:
6871  case CallingConv::Swift:
6873  case CallingConv::Tail:
6874  case CallingConv::Fast:
6875  return true;
6876  default:
6877  return false;
6878  }
6879 }
6880 
6882  const AArch64Subtarget *Subtarget,
6884  CCState &CCInfo) {
6885  const SelectionDAG &DAG = CLI.DAG;
6886  CallingConv::ID CalleeCC = CLI.CallConv;
6887  bool IsVarArg = CLI.IsVarArg;
6888  const SmallVector<ISD::OutputArg, 32> &Outs = CLI.Outs;
6889  bool IsCalleeWin64 = Subtarget->isCallingConvWin64(CalleeCC);
6890 
6891  unsigned NumArgs = Outs.size();
6892  for (unsigned i = 0; i != NumArgs; ++i) {
6893  MVT ArgVT = Outs[i].VT;
6894  ISD::ArgFlagsTy ArgFlags = Outs[i].Flags;
6895 
6896  bool UseVarArgCC = false;
6897  if (IsVarArg) {
6898  // On Windows, the fixed arguments in a vararg call are passed in GPRs
6899  // too, so use the vararg CC to force them to integer registers.
6900  if (IsCalleeWin64) {
6901  UseVarArgCC = true;
6902  } else {
6903  UseVarArgCC = !Outs[i].IsFixed;
6904  }
6905  }
6906 
6907  if (!UseVarArgCC) {
6908  // Get type of the original argument.
6909  EVT ActualVT =
6910  TLI.getValueType(DAG.getDataLayout(), CLI.Args[Outs[i].OrigArgIndex].Ty,
6911  /*AllowUnknown*/ true);
6912  MVT ActualMVT = ActualVT.isSimple() ? ActualVT.getSimpleVT() : ArgVT;
6913  // If ActualMVT is i1/i8/i16, we should set LocVT to i8/i8/i16.
6914  if (ActualMVT == MVT::i1 || ActualMVT == MVT::i8)
6915  ArgVT = MVT::i8;
6916  else if (ActualMVT == MVT::i16)
6917  ArgVT = MVT::i16;
6918  }
6919 
6920  CCAssignFn *AssignFn = TLI.CCAssignFnForCall(CalleeCC, UseVarArgCC);
6921  bool Res = AssignFn(i, ArgVT, ArgVT, CCValAssign::Full, ArgFlags, CCInfo);
6922  assert(!Res && "Call operand has unhandled type");
6923  (void)Res;
6924  }
6925 }
6926 
6927 bool AArch64TargetLowering::isEligibleForTailCallOptimization(
6928  const CallLoweringInfo &CLI) const {
6929  CallingConv::ID CalleeCC = CLI.CallConv;
6930  if (!mayTailCallThisCC(CalleeCC))
6931  return false;
6932 
6933  SDValue Callee = CLI.Callee;
6934  bool IsVarArg = CLI.IsVarArg;
6935  const SmallVector<ISD::OutputArg, 32> &Outs = CLI.Outs;
6936  const SmallVector<SDValue, 32> &OutVals = CLI.OutVals;
6937  const SmallVector<ISD::InputArg, 32> &Ins = CLI.Ins;
6938  const SelectionDAG &DAG = CLI.DAG;
6939  MachineFunction &MF = DAG.getMachineFunction();
6940  const Function &CallerF = MF.getFunction();
6941  CallingConv::ID CallerCC = CallerF.getCallingConv();
6942 
6943  // SME Streaming functions are not eligible for TCO as they may require
6944  // the streaming mode or ZA to be restored after returning from the call.
6945  SMEAttrs CallerAttrs(MF.getFunction());
6946  auto CalleeAttrs = CLI.CB ? SMEAttrs(*CLI.CB) : SMEAttrs(SMEAttrs::Normal);
6947  if (CallerAttrs.requiresSMChange(CalleeAttrs) ||
6948  CallerAttrs.requiresLazySave(CalleeAttrs))
6949  return false;
6950 
6951  // Functions using the C or Fast calling convention that have an SVE signature
6952  // preserve more registers and should assume the SVE_VectorCall CC.
6953  // The check for matching callee-saved regs will determine whether it is
6954  // eligible for TCO.
6955  if ((CallerCC == CallingConv::C || CallerCC == CallingConv::Fast) &&
6958 
6959  bool CCMatch = CallerCC == CalleeCC;
6960 
6961  // When using the Windows calling convention on a non-windows OS, we want
6962  // to back up and restore X18 in such functions; we can't do a tail call
6963  // from those functions.
6964  if (CallerCC == CallingConv::Win64 && !Subtarget->isTargetWindows() &&
6965  CalleeCC != CallingConv::Win64)
6966  return false;
6967 
6968  // Byval parameters hand the function a pointer directly into the stack area
6969  // we want to reuse during a tail call. Working around this *is* possible (see
6970  // X86) but less efficient and uglier in LowerCall.
6971  for (Function::const_arg_iterator i = CallerF.arg_begin(),
6972  e = CallerF.arg_end();
6973  i != e; ++i) {
6974  if (i->hasByValAttr())
6975  return false;
6976 
6977  // On Windows, "inreg" attributes signify non-aggregate indirect returns.
6978  // In this case, it is necessary to save/restore X0 in the callee. Tail
6979  // call opt interferes with this. So we disable tail call opt when the
6980  // caller has an argument with "inreg" attribute.
6981 
6982  // FIXME: Check whether the callee also has an "inreg" argument.
6983  if (i->hasInRegAttr())
6984  return false;
6985  }
6986 
6987  if (canGuaranteeTCO(CalleeCC, getTargetMachine().Options.GuaranteedTailCallOpt))
6988  return CCMatch;
6989 
6990  // Externally-defined functions with weak linkage should not be
6991  // tail-called on AArch64 when the OS does not support dynamic
6992  // pre-emption of symbols, as the AAELF spec requires normal calls
6993  // to undefined weak functions to be replaced with a NOP or jump to the
6994  // next instruction. The behaviour of branch instructions in this
6995  // situation (as used for tail calls) is implementation-defined, so we
6996  // cannot rely on the linker replacing the tail call with a return.
6997  if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) {
6998  const GlobalValue *GV = G->getGlobal();
7000  if (GV->hasExternalWeakLinkage() &&
7001  (!TT.isOSWindows() || TT.isOSBinFormatELF() || TT.isOSBinFormatMachO()))
7002  return false;
7003  }
7004 
7005  // Now we search for cases where we can use a tail call without changing the
7006  // ABI. Sibcall is used in some places (particularly gcc) to refer to this
7007  // concept.
7008 
7009  // I want anyone implementing a new calling convention to think long and hard
7010  // about this assert.
7011  assert((!IsVarArg || CalleeCC == CallingConv::C) &&
7012  "Unexpected variadic calling convention");
7013 
7014  LLVMContext &C = *DAG.getContext();
7015  // Check that the call results are passed in the same way.
7016  if (!CCState::resultsCompatible(CalleeCC, CallerCC, MF, C, Ins,
7017  CCAssignFnForCall(CalleeCC, IsVarArg),
7018  CCAssignFnForCall(CallerCC, IsVarArg)))
7019  return false;
7020  // The callee has to preserve all registers the caller needs to preserve.
7021  const AArch64RegisterInfo *TRI = Subtarget->getRegisterInfo();
7022  const uint32_t *CallerPreserved = TRI->getCallPreservedMask(MF, CallerCC);
7023  if (!CCMatch) {
7024  const uint32_t *CalleePreserved = TRI->getCallPreservedMask(MF, CalleeCC);
7025  if (Subtarget->hasCustomCallingConv()) {
7026  TRI->UpdateCustomCallPreservedMask(MF, &CallerPreserved);
7027  TRI->UpdateCustomCallPreservedMask(MF, &CalleePreserved);
7028  }
7029  if (!TRI->regmaskSubsetEqual(CallerPreserved, CalleePreserved))
7030  return false;
7031  }
7032 
7033  // Nothing more to check if the callee is taking no arguments
7034  if (Outs.empty())
7035  return true;
7036 
7038  CCState CCInfo(CalleeCC, IsVarArg, MF, ArgLocs, C);
7039 
7040  analyzeCallOperands(*this, Subtarget, CLI, CCInfo);
7041 
7042  if (IsVarArg && !(CLI.CB && CLI.CB->isMustTailCall())) {
7043  // When we are musttail, additional checks have been done and we can safely ignore this check
7044  // At least two cases here: if caller is fastcc then we can't have any
7045  // memory arguments (we'd be expected to clean up the stack afterwards). If
7046  // caller is C then we could potentially use its argument area.
7047 
7048  // FIXME: for now we take the most conservative of these in both cases:
7049  // disallow all variadic memory operands.
7050  for (const CCValAssign &ArgLoc : ArgLocs)
7051  if (!ArgLoc.isRegLoc())
7052  return false;
7053  }
7054 
7055  const AArch64FunctionInfo *FuncInfo = MF.getInfo<AArch64FunctionInfo>();
7056 
7057  // If any of the arguments is passed indirectly, it must be SVE, so the
7058  // 'getBytesInStackArgArea' is not sufficient to determine whether we need to
7059  // allocate space on the stack. That is why we determine this explicitly here
7060  // the call cannot be a tailcall.
7061  if (llvm::any_of(ArgLocs, [&](CCValAssign &A) {
7062  assert((A.getLocInfo() != CCValAssign::Indirect ||
7063  A.getValVT().isScalableVector() ||
7064  Subtarget->isWindowsArm64EC()) &&
7065  "Expected value to be scalable");
7066  return A.getLocInfo() == CCValAssign::Indirect;
7067  }))
7068  return false;
7069 
7070  // If the stack arguments for this call do not fit into our own save area then
7071  // the call cannot be made tail.
7072  if (CCInfo.getNextStackOffset() > FuncInfo->getBytesInStackArgArea())
7073  return false;
7074 
7075  const MachineRegisterInfo &MRI = MF.getRegInfo();
7076  if (!parametersInCSRMatch(MRI, CallerPreserved, ArgLocs, OutVals))
7077  return false;
7078 
7079  return true;
7080 }
7081 
7082 SDValue AArch64TargetLowering::addTokenForArgument(SDValue Chain,
7083  SelectionDAG &DAG,
7084  MachineFrameInfo &MFI,
7085  int ClobberedFI) const {
7086  SmallVector<SDValue, 8> ArgChains;
7087  int64_t FirstByte = MFI.getObjectOffset(ClobberedFI);
7088  int64_t LastByte = FirstByte + MFI.getObjectSize(ClobberedFI) - 1;
7089 
7090  // Include the original chain at the beginning of the list. When this is
7091  // used by target LowerCall hooks, this helps legalize find the
7092  // CALLSEQ_BEGIN node.
7093  ArgChains.push_back(Chain);
7094 
7095  // Add a chain value for each stack argument corresponding
7096  for (SDNode *U : DAG.getEntryNode().getNode()->uses())
7097  if (LoadSDNode *L = dyn_cast<LoadSDNode>(U))
7098  if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(L->getBasePtr()))
7099  if (FI->getIndex() < 0) {
7100  int64_t InFirstByte = MFI.getObjectOffset(FI->getIndex());
7101  int64_t InLastByte = InFirstByte;
7102  InLastByte += MFI.getObjectSize(FI->getIndex()) - 1;
7103 
7104  if ((InFirstByte <= FirstByte && FirstByte <= InLastByte) ||
7105  (FirstByte <= InFirstByte && InFirstByte <= LastByte))
7106  ArgChains.push_back(SDValue(L, 1));
7107  }
7108 
7109  // Build a tokenfactor for all the chains.
7110  return DAG.getNode(ISD::TokenFactor, SDLoc(Chain), MVT::Other, ArgChains);
7111 }
7112 
7113 bool AArch64TargetLowering::DoesCalleeRestoreStack(CallingConv::ID CallCC,
7114  bool TailCallOpt) const {
7115  return (CallCC == CallingConv::Fast && TailCallOpt) ||
7116  CallCC == CallingConv::Tail || CallCC == CallingConv::SwiftTail;
7117 }
7118 
7119 // Check if the value is zero-extended from i1 to i8
7120 static bool checkZExtBool(SDValue Arg, const SelectionDAG &DAG) {
7121  unsigned SizeInBits = Arg.getValueType().getSizeInBits();
7122  if (SizeInBits < 8)
7123  return false;
7124 
7125  APInt RequredZero(SizeInBits, 0xFE);
7126  KnownBits Bits = DAG.computeKnownBits(Arg, 4);
7127  bool ZExtBool = (Bits.Zero & RequredZero) == RequredZero;
7128  return ZExtBool;
7129 }
7130 
7132  SelectionDAG &DAG, SDLoc DL, bool Enable,
7133  SDValue Chain, SDValue InFlag, SDValue PStateSM, bool Entry) const {
7134  const AArch64RegisterInfo *TRI = Subtarget->getRegisterInfo();
7135  SDValue RegMask = DAG.getRegisterMask(TRI->getSMStartStopCallPreservedMask());
7136  SDValue MSROp =
7137  DAG.getTargetConstant((int32_t)AArch64SVCR::SVCRSM, DL, MVT::i32);
7138 
7139  SDValue ExpectedSMVal =
7140  DAG.getTargetConstant(Entry ? Enable : !Enable, DL, MVT::i64);
7141  SmallVector<SDValue> Ops = {Chain, MSROp, PStateSM, ExpectedSMVal, RegMask};
7142 
7143  if (InFlag)
7144  Ops.push_back(InFlag);
7145 
7146  unsigned Opcode = Enable ? AArch64ISD::SMSTART : AArch64ISD::SMSTOP;
7147  return DAG.getNode(Opcode, DL, DAG.getVTList(MVT::Other, MVT::Glue), Ops);
7148 }
7149 
7150 /// LowerCall - Lower a call to a callseq_start + CALL + callseq_end chain,
7151 /// and add input and output parameter nodes.
7152 SDValue
7153 AArch64TargetLowering::LowerCall(CallLoweringInfo &CLI,
7154  SmallVectorImpl<SDValue> &InVals) const {
7155  SelectionDAG &DAG = CLI.DAG;
7156  SDLoc &DL = CLI.DL;
7157  SmallVector<ISD::OutputArg, 32> &Outs = CLI.Outs;
7158  SmallVector<SDValue, 32> &OutVals = CLI.OutVals;
7160  SDValue Chain = CLI.Chain;
7161  SDValue Callee = CLI.Callee;
7162  bool &IsTailCall = CLI.IsTailCall;
7163  CallingConv::ID &CallConv = CLI.CallConv;
7164  bool IsVarArg = CLI.IsVarArg;
7165 
7166  MachineFunction &MF = DAG.getMachineFunction();
7168  bool IsThisReturn = false;
7169 
7171  bool TailCallOpt = MF.getTarget().Options.GuaranteedTailCallOpt;
7172  bool IsCFICall = CLI.CB && CLI.CB->isIndirectCall() && CLI.CFIType;
7173  bool IsSibCall = false;
7174  bool GuardWithBTI = false;
7175 
7176  if (CLI.CB && CLI.CB->hasFnAttr(Attribute::ReturnsTwice) &&
7177  !Subtarget->noBTIAtReturnTwice()) {
7178  GuardWithBTI = FuncInfo->branchTargetEnforcement();
7179  }
7180 
7181  // Analyze operands of the call, assigning locations to each operand.
7183  CCState CCInfo(CallConv, IsVarArg, MF, ArgLocs, *DAG.getContext());
7184 
7185  if (IsVarArg) {
7186  unsigned NumArgs = Outs.size();
7187 
7188  for (unsigned i = 0; i != NumArgs; ++i) {
7189  if (!Outs[i].IsFixed && Outs[i].VT.isScalableVector())
7190  report_fatal_error("Passing SVE types to variadic functions is "
7191  "currently not supported");
7192  }
7193  }
7194 
7195  analyzeCallOperands(*this, Subtarget, CLI, CCInfo);
7196 
7197  CCAssignFn *RetCC = CCAssignFnForReturn(CallConv);
7198  // Assign locations to each value returned by this call.
7200  CCState RetCCInfo(CallConv, IsVarArg, DAG.getMachineFunction(), RVLocs,
7201  *DAG.getContext());
7202  RetCCInfo.AnalyzeCallResult(Ins, RetCC);
7203 
7204  // Check callee args/returns for SVE registers and set calling convention
7205  // accordingly.
7206  if (CallConv == CallingConv::C || CallConv == CallingConv::Fast) {
7207  auto HasSVERegLoc = [](CCValAssign &Loc) {
7208  if (!Loc.isRegLoc())
7209  return false;
7210  return AArch64::ZPRRegClass.contains(Loc.getLocReg()) ||
7211  AArch64::PPRRegClass.contains(Loc.getLocReg());
7212  };
7213  if (any_of(RVLocs, HasSVERegLoc) || any_of(ArgLocs, HasSVERegLoc))
7215  }
7216 
7217  if (IsTailCall) {
7218  // Check if it's really possible to do a tail call.
7219  IsTailCall = isEligibleForTailCallOptimization(CLI);
7220 
7221  // A sibling call is one where we're under the usual C ABI and not planning
7222  // to change that but can still do a tail call:
7223  if (!TailCallOpt && IsTailCall && CallConv != CallingConv::Tail &&
7224  CallConv != CallingConv::SwiftTail)
7225  IsSibCall = true;
7226 
7227  if (IsTailCall)
7228  ++NumTailCalls;
7229  }
7230 
7231  if (!IsTailCall && CLI.CB && CLI.CB->isMustTailCall())
7232  report_fatal_error("failed to perform tail call elimination on a call "
7233  "site marked musttail");
7234 
7235  // Get a count of how many bytes are to be pushed on the stack.
7236  unsigned NumBytes = CCInfo.getNextStackOffset();
7237 
7238  if (IsSibCall) {
7239  // Since we're not changing the ABI to make this a tail call, the memory
7240  // operands are already available in the caller's incoming argument space.
7241  NumBytes = 0;
7242  }
7243 
7244  // FPDiff is the byte offset of the call's argument area from the callee's.
7245  // Stores to callee stack arguments will be placed in FixedStackSlots offset
7246  // by this amount for a tail call. In a sibling call it must be 0 because the
7247  // caller will deallocate the entire stack and the callee still expects its
7248  // arguments to begin at SP+0. Completely unused for non-tail calls.
7249  int FPDiff = 0;
7250 
7251  if (IsTailCall && !IsSibCall) {
7252  unsigned NumReusableBytes = FuncInfo->getBytesInStackArgArea();
7253 
7254  // Since callee will pop argument stack as a tail call, we must keep the
7255  // popped size 16-byte aligned.
7256  NumBytes = alignTo(NumBytes, 16);
7257 
7258  // FPDiff will be negative if this tail call requires more space than we
7259  // would automatically have in our incoming argument space. Positive if we
7260  // can actually shrink the stack.
7261  FPDiff = NumReusableBytes - NumBytes;
7262 
7263  // Update the required reserved area if this is the tail call requiring the
7264  // most argument stack space.
7265  if (FPDiff < 0 && FuncInfo->getTailCallReservedStack() < (unsigned)-FPDiff)
7266  FuncInfo->setTailCallReservedStack(-FPDiff);
7267 
7268  // The stack pointer must be 16-byte aligned at all times it's used for a
7269  // memory operation, which in practice means at *all* times and in
7270  // particular across call boundaries. Therefore our own arguments started at
7271  // a 16-byte aligned SP and the delta applied for the tail call should
7272  // satisfy the same constraint.
7273  assert(FPDiff % 16 == 0 && "unaligned stack on tail call");
7274  }
7275 
7276  // Determine whether we need any streaming mode changes.
7277  SMEAttrs CalleeAttrs, CallerAttrs(MF.getFunction());
7278  if (CLI.CB)
7279  CalleeAttrs = SMEAttrs(*CLI.CB);
7280  else if (std::optional<SMEAttrs> Attrs =
7282  CalleeAttrs = *Attrs;
7283 
7284  bool RequiresLazySave = CallerAttrs.requiresLazySave(CalleeAttrs);
7285 
7286  MachineFrameInfo &MFI = MF.getFrameInfo();
7287  if (RequiresLazySave) {
7288  // Set up a lazy save mechanism by storing the runtime live slices
7289  // (worst-case N*N) to the TPIDR2 stack object.
7291  DAG.getConstant(1, DL, MVT::i32));
7292  SDValue NN = DAG.getNode(ISD::MUL, DL, MVT::i64, N, N);
7293  unsigned TPIDR2Obj = FuncInfo->getLazySaveTPIDR2Obj();
7294 
7295  MachinePointerInfo MPI = MachinePointerInfo::getStack(MF, TPIDR2Obj);
7296  SDValue TPIDR2ObjAddr = DAG.getFrameIndex(TPIDR2Obj,
7298  SDValue BufferPtrAddr =
7299  DAG.getNode(ISD::ADD, DL, TPIDR2ObjAddr.getValueType(), TPIDR2ObjAddr,
7300  DAG.getConstant(8, DL, TPIDR2ObjAddr.getValueType()));
7301  Chain = DAG.getTruncStore(Chain, DL, NN, BufferPtrAddr, MPI, MVT::i16);
7302  Chain = DAG.getNode(
7304  DAG.getConstant(Intrinsic::aarch64_sme_set_tpidr2, DL, MVT::i32),
7305  TPIDR2ObjAddr);
7306  }
7307 
7308  SDValue PStateSM;
7309  std::optional<bool> RequiresSMChange =
7310  CallerAttrs.requiresSMChange(CalleeAttrs);
7311  if (RequiresSMChange)
7312  PStateSM = getPStateSM(DAG, Chain, CallerAttrs, DL, MVT::i64);
7313 
7314  // Adjust the stack pointer for the new arguments...
7315  // These operations are automatically eliminated by the prolog/epilog pass
7316  if (!IsSibCall)
7317  Chain = DAG.getCALLSEQ_START(Chain, IsTailCall ? 0 : NumBytes, 0, DL);
7318 
7319  SDValue StackPtr = DAG.getCopyFromReg(Chain, DL, AArch64::SP,
7320  getPointerTy(DAG.getDataLayout()));
7321 
7323  SmallSet<unsigned, 8> RegsUsed;
7324  SmallVector<SDValue, 8> MemOpChains;
7325  auto PtrVT = getPointerTy(DAG.getDataLayout());
7326 
7327  if (IsVarArg && CLI.CB && CLI.CB->isMustTailCall()) {
7328  const auto &Forwards = FuncInfo->getForwardedMustTailRegParms();
7329  for (const auto &F : Forwards) {
7330  SDValue Val = DAG.getCopyFromReg(Chain, DL, F.VReg, F.VT);
7331  RegsToPass.emplace_back(F.PReg, Val);
7332  }
7333  }
7334 
7335  // Walk the register/memloc assignments, inserting copies/loads.
7336  unsigned ExtraArgLocs = 0;
7337  for (unsigned i = 0, e = Outs.size(); i != e; ++i) {
7338  CCValAssign &VA = ArgLocs[i - ExtraArgLocs];
7339  SDValue Arg = OutVals[i];
7340  ISD::ArgFlagsTy Flags = Outs[i].Flags;
7341 
7342  // Promote the value if needed.
7343  switch (VA.getLocInfo()) {
7344  default:
7345  llvm_unreachable("Unknown loc info!");
7346  case CCValAssign::Full:
7347  break;
7348  case CCValAssign::SExt:
7349  Arg = DAG.getNode(ISD::SIGN_EXTEND, DL, VA.getLocVT(), Arg);
7350  break;
7351  case CCValAssign::ZExt:
7352  Arg = DAG.getNode(ISD::ZERO_EXTEND, DL, VA.getLocVT(), Arg);
7353  break;
7354  case CCValAssign::AExt:
7355  if (Outs[i].ArgVT == MVT::i1) {
7356  // AAPCS requires i1 to be zero-extended to 8-bits by the caller.
7357  //
7358  // Check if we actually have to do this, because the value may
7359  // already be zero-extended.
7360  //
7361  // We cannot just emit a (zext i8 (trunc (assert-zext i8)))
7362  // and rely on DAGCombiner to fold this, because the following
7363  // (anyext i32) is combined with (zext i8) in DAG.getNode:
7364  //
7365  // (ext (zext x)) -> (zext x)
7366  //
7367  // This will give us (zext i32), which we cannot remove, so
7368  // try to check this beforehand.
7369  if (!checkZExtBool(Arg, DAG)) {
7370  Arg = DAG.getNode(ISD::TRUNCATE, DL, MVT::i1, Arg);
7372  }
7373  }
7374  Arg = DAG.getNode(ISD::ANY_EXTEND, DL, VA.getLocVT(), Arg);
7375  break;
7377  assert(VA.getValVT() == MVT::i32 && "only expect 32 -> 64 upper bits");
7378  Arg = DAG.getNode(ISD::ANY_EXTEND, DL, VA.getLocVT(), Arg);
7379  Arg = DAG.getNode(ISD::SHL, DL, VA.getLocVT(), Arg,
7380  DAG.getConstant(32, DL, VA.getLocVT()));
7381  break;
7382  case CCValAssign::BCvt:
7383  Arg = DAG.getBitcast(VA.getLocVT(), Arg);
7384  break;
7385  case CCValAssign::Trunc:
7386  Arg = DAG.getZExtOrTrunc(Arg, DL, VA.getLocVT());
7387  break;
7388  case CCValAssign::FPExt:
7389  Arg = DAG.getNode(ISD::FP_EXTEND, DL, VA.getLocVT(), Arg);
7390  break;
7391  case CCValAssign::Indirect:
7392  bool isScalable = VA.getValVT().isScalableVector();
7393  assert((isScalable || Subtarget->isWindowsArm64EC()) &&
7394  "Indirect arguments should be scalable on most subtargets");
7395 
7396  uint64_t StoreSize = VA.getValVT().getStoreSize().getKnownMinValue();
7397  uint64_t PartSize = StoreSize;
7398  unsigned NumParts = 1;
7399  if (Outs[i].Flags.isInConsecutiveRegs()) {
7400  assert(!Outs[i].Flags.isInConsecutiveRegsLast());
7401  while (!Outs[i + NumParts - 1].Flags.isInConsecutiveRegsLast())
7402  ++NumParts;
7403  StoreSize *= NumParts;
7404  }
7405 
7406  Type *Ty = EVT(VA.getValVT()).getTypeForEVT(*DAG.getContext());
7407  Align Alignment = DAG.getDataLayout().getPrefTypeAlign(Ty);
7408  int FI = MFI.CreateStackObject(StoreSize, Alignment, false);
7409  if (isScalable)
7411 
7413  SDValue Ptr = DAG.getFrameIndex(
7415  SDValue SpillSlot = Ptr;
7416 
7417  // Ensure we generate all stores for each tuple part, whilst updating the
7418  // pointer after each store correctly using vscale.
7419  while (NumParts) {
7420  Chain = DAG.getStore(Chain, DL, OutVals[i], Ptr, MPI);
7421  NumParts--;
7422  if (NumParts > 0) {
7423  SDValue BytesIncrement;
7424  if (isScalable) {
7425  BytesIncrement = DAG.getVScale(
7426  DL, Ptr.getValueType(),
7427  APInt(Ptr.getValueSizeInBits().getFixedValue(), PartSize));
7428  } else {
7429  BytesIncrement = DAG.getConstant(
7430  APInt(Ptr.getValueSizeInBits().getFixedValue(), PartSize), DL,
7431  Ptr.getValueType());
7432  }
7434  Flags.setNoUnsignedWrap(true);
7435 
7436  MPI = MachinePointerInfo(MPI.getAddrSpace());
7437  Ptr = DAG.getNode(ISD::ADD, DL, Ptr.getValueType(), Ptr,
7438  BytesIncrement, Flags);
7439  ExtraArgLocs++;
7440  i++;
7441  }
7442  }
7443 
7444  Arg = SpillSlot;
7445  break;
7446  }
7447 
7448  if (VA.isRegLoc()) {
7449  if (i == 0 && Flags.isReturned() && !Flags.isSwiftSelf() &&
7450  Outs[0].VT == MVT::i64) {
7451  assert(VA.getLocVT() == MVT::i64 &&
7452  "unexpected calling convention register assignment");
7453  assert(!Ins.empty() && Ins[0].VT == MVT::i64 &&
7454  "unexpected use of 'returned'");
7455  IsThisReturn = true;
7456  }
7457  if (RegsUsed.count(VA.getLocReg())) {
7458  // If this register has already been used then we're trying to pack
7459  // parts of an [N x i32] into an X-register. The extension type will
7460  // take care of putting the two halves in the right place but we have to
7461  // combine them.
7462  SDValue &Bits =
7463  llvm::find_if(RegsToPass,
7464  [=](const std::pair<unsigned, SDValue> &Elt) {
7465  return Elt.first == VA.getLocReg();
7466  })
7467  ->second;
7468  Bits = DAG.getNode(ISD::OR, DL, Bits.getValueType(), Bits, Arg);
7469  // Call site info is used for function's parameter entry value
7470  // tracking. For now we track only simple cases when parameter
7471  // is transferred through whole register.
7472  llvm::erase_if(CSInfo, [&VA](MachineFunction::ArgRegPair ArgReg) {
7473  return ArgReg.Reg == VA.getLocReg();
7474  });
7475  } else {
7476  // Add an extra level of indirection for streaming mode changes by
7477  // using a pseudo copy node that cannot be rematerialised between a
7478  // smstart/smstop and the call by the simple register coalescer.
7479  if (RequiresSMChange && isa<FrameIndexSDNode>(Arg))
7481  RegsToPass.emplace_back(VA.getLocReg(), Arg);
7482  RegsUsed.insert(VA.getLocReg());
7483  const TargetOptions &Options = DAG.getTarget().Options;
7484  if (Options.EmitCallSiteInfo)
7485  CSInfo.emplace_back(VA.getLocReg(), i);
7486  }
7487  } else {
7488  assert(VA.isMemLoc());
7489 
7490  SDValue DstAddr;
7491  MachinePointerInfo DstInfo;
7492 
7493  // FIXME: This works on big-endian for composite byvals, which are the
7494  // common case. It should also work for fundamental types too.
7495  uint32_t BEAlign = 0;
7496  unsigned OpSize;
7497  if (VA.getLocInfo() == CCValAssign::Indirect ||
7499  OpSize = VA.getLocVT().getFixedSizeInBits();
7500  else
7501  OpSize = Flags.isByVal() ? Flags.getByValSize() * 8
7502  : VA.getValVT().getSizeInBits();
7503  OpSize = (OpSize + 7) / 8;
7504  if (!Subtarget->isLittleEndian() && !Flags.isByVal() &&
7505  !Flags.isInConsecutiveRegs()) {
7506  if (OpSize < 8)
7507  BEAlign = 8 - OpSize;
7508  }
7509  unsigned LocMemOffset = VA.getLocMemOffset();
7510  int32_t Offset = LocMemOffset + BEAlign;
7511  SDValue PtrOff = DAG.getIntPtrConstant(Offset, DL);
7512  PtrOff = DAG.getNode(ISD::ADD, DL, PtrVT, StackPtr, PtrOff);
7513 
7514  if (IsTailCall) {
7515  Offset = Offset + FPDiff;
7516  int FI = MF.getFrameInfo().CreateFixedObject(OpSize, Offset, true);
7517 
7518  DstAddr = DAG.getFrameIndex(FI, PtrVT);
7519  DstInfo = MachinePointerInfo::getFixedStack(MF, FI);
7520 
7521  // Make sure any stack arguments overlapping with where we're storing
7522  // are loaded before this eventual operation. Otherwise they'll be
7523  // clobbered.
7524  Chain = addTokenForArgument(Chain, DAG, MF.getFrameInfo(), FI);
7525  } else {
7526  SDValue PtrOff = DAG.getIntPtrConstant(Offset, DL);
7527 
7528  DstAddr = DAG.getNode(ISD::ADD, DL, PtrVT, StackPtr, PtrOff);
7529  DstInfo = MachinePointerInfo::getStack(MF, LocMemOffset);
7530  }
7531 
7532  if (Outs[i].Flags.isByVal()) {
7533  SDValue SizeNode =
7534  DAG.getConstant(Outs[i].Flags.getByValSize(), DL, MVT::i64);
7535  SDValue Cpy = DAG.getMemcpy(
7536  Chain, DL, DstAddr, Arg, SizeNode,
7537  Outs[i].Flags.getNonZeroByValAlign(),
7538  /*isVol = */ false, /*AlwaysInline = */ false,
7539  /*isTailCall = */ false, DstInfo, MachinePointerInfo());
7540 
7541  MemOpChains.push_back(Cpy);
7542  } else {
7543  // Since we pass i1/i8/i16 as i1/i8/i16 on stack and Arg is already
7544  // promoted to a legal register type i32, we should truncate Arg back to
7545  // i1/i8/i16.
7546  if (VA.getValVT() == MVT::i1 || VA.getValVT() == MVT::i8 ||
7547  VA.getValVT() == MVT::i16)
7548  Arg = DAG.getNode(ISD::TRUNCATE, DL, VA.getValVT(), Arg);
7549 
7550  SDValue Store = DAG.getStore(Chain, DL, Arg, DstAddr, DstInfo);
7551  MemOpChains.push_back(Store);
7552  }
7553  }
7554  }
7555 
7556  if (IsVarArg && Subtarget->isWindowsArm64EC()) {
7557  // For vararg calls, the Arm64EC ABI requires values in x4 and x5
7558  // describing the argument list. x4 contains the address of the
7559  // first stack parameter. x5 contains the size in bytes of all parameters
7560  // passed on the stack.
7561  RegsToPass.emplace_back(AArch64::X4, StackPtr);
7562  RegsToPass.emplace_back(AArch64::X5,
7563  DAG.getConstant(NumBytes, DL, MVT::i64));
7564  }
7565 
7566  if (!MemOpChains.empty())
7567  Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, MemOpChains);
7568 
7569  SDValue InFlag;
7570  if (RequiresSMChange) {
7571  SDValue NewChain = changeStreamingMode(DAG, DL, *RequiresSMChange, Chain,
7572  InFlag, PStateSM, true);
7573  Chain = NewChain.getValue(0);
7574  InFlag = NewChain.getValue(1);
7575  }
7576 
7577  // Build a sequence of copy-to-reg nodes chained together with token chain
7578  // and flag operands which copy the outgoing args into the appropriate regs.
7579  for (auto &RegToPass : RegsToPass) {
7580  Chain = DAG.getCopyToReg(Chain, DL, RegToPass.first,
7581  RegToPass.second, InFlag);
7582  InFlag = Chain.getValue(1);
7583  }
7584 
7585  // If the callee is a GlobalAddress/ExternalSymbol node (quite common, every
7586  // direct call is) turn it into a TargetGlobalAddress/TargetExternalSymbol
7587  // node so that legalize doesn't hack it.
7588  if (auto *G = dyn_cast<GlobalAddressSDNode>(Callee)) {
7589  auto GV = G->getGlobal();
7590  unsigned OpFlags =
7592  if (OpFlags & AArch64II::MO_GOT) {
7593  Callee = DAG.getTargetGlobalAddress(GV, DL, PtrVT, 0, OpFlags);
7594  Callee = DAG.getNode(AArch64ISD::LOADgot, DL, PtrVT, Callee);
7595  } else {
7596  const GlobalValue *GV = G->getGlobal();
7597  Callee = DAG.getTargetGlobalAddress(GV, DL, PtrVT, 0, 0);
7598  }
7599  } else if (auto *S = dyn_cast<ExternalSymbolSDNode>(Callee)) {
7601  Subtarget->isTargetMachO()) {
7602  const char *Sym = S->getSymbol();
7604  Callee = DAG.getNode(AArch64ISD::LOADgot, DL, PtrVT, Callee);
7605  } else {
7606  const char *Sym = S->getSymbol();
7607  Callee = DAG.getTargetExternalSymbol(Sym, PtrVT, 0);
7608  }
7609  }
7610 
7611  // We don't usually want to end the call-sequence here because we would tidy
7612  // the frame up *after* the call, however in the ABI-changing tail-call case
7613  // we've carefully laid out the parameters so that when sp is reset they'll be
7614  // in the correct location.
7615  if (IsTailCall && !IsSibCall) {
7616  Chain = DAG.getCALLSEQ_END(Chain, 0, 0, InFlag, DL);
7617  InFlag = Chain.getValue(1);
7618  }
7619 
7620  std::vector<SDValue> Ops;
7621  Ops.push_back(Chain);
7622  Ops.push_back(Callee);
7623 
7624  if (IsTailCall) {
7625  // Each tail call may have to adjust the stack by a different amount, so
7626  // this information must travel along with the operation for eventual
7627  // consumption by emitEpilogue.
7628  Ops.push_back(DAG.getTargetConstant(FPDiff, DL, MVT::i32));
7629  }
7630 
7631  // Add argument registers to the end of the list so that they are known live
7632  // into the call.
7633  for (auto &RegToPass : RegsToPass)
7634  Ops.push_back(DAG.getRegister(RegToPass.first,
7635  RegToPass.second.getValueType()));
7636 
7637  // Add a register mask operand representing the call-preserved registers.
7638  const uint32_t *Mask;
7639  const AArch64RegisterInfo *TRI = Subtarget->getRegisterInfo();
7640  if (IsThisReturn) {
7641  // For 'this' returns, use the X0-preserving mask if applicable
7642  Mask = TRI->getThisReturnPreservedMask(MF, CallConv);
7643  if (!Mask) {
7644  IsThisReturn = false;
7645  Mask = TRI->getCallPreservedMask(MF, CallConv);
7646  }
7647  } else
7648  Mask = TRI->getCallPreservedMask(MF, CallConv);
7649 
7650  if (Subtarget->hasCustomCallingConv())
7651  TRI->UpdateCustomCallPreservedMask(MF, &Mask);
7652 
7653  if (TRI->isAnyArgRegReserved(MF))
7654  TRI->emitReservedArgRegCallError(MF);
7655 
7656  assert(Mask && "Missing call preserved mask for calling convention");
7657  Ops.push_back(DAG.getRegisterMask(Mask));
7658 
7659  if (InFlag.getNode())
7660  Ops.push_back(InFlag);
7661 
7662  SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
7663 
7664  // If we're doing a tall call, use a TC_RETURN here rather than an
7665  // actual call instruction.
7666  if (IsTailCall) {
7668  SDValue Ret = DAG.getNode(AArch64ISD::TC_RETURN, DL, NodeTys, Ops);
7669 
7670  if (IsCFICall)
7671  Ret.getNode()->setCFIType(CLI.CFIType->getZExtValue());
7672 
7673  DAG.addCallSiteInfo(Ret.getNode(), std::move(CSInfo));
7674  return Ret;
7675  }
7676 
7677  unsigned CallOpc = AArch64ISD::CALL;
7678  // Calls with operand bundle "clang.arc.attachedcall" are special. They should
7679  // be expanded to the call, directly followed by a special marker sequence and
7680  // a call to an ObjC library function. Use CALL_RVMARKER to do that.
7681  if (CLI.CB && objcarc::hasAttachedCallOpBundle(CLI.CB)) {
7682  assert(!IsTailCall &&
7683  "tail calls cannot be marked with clang.arc.attachedcall");
7684  CallOpc = AArch64ISD::CALL_RVMARKER;
7685 
7686  // Add a target global address for the retainRV/claimRV runtime function
7687  // just before the call target.
7688  Function *ARCFn = *objcarc::getAttachedARCFunction(CLI.CB);
7689  auto GA = DAG.getTargetGlobalAddress(ARCFn, DL, PtrVT);
7690  Ops.insert(Ops.begin() + 1, GA);
7691  } else if (GuardWithBTI)
7692  CallOpc = AArch64ISD::CALL_BTI;
7693 
7694  // Returns a chain and a flag for retval copy to use.
7695  Chain = DAG.getNode(CallOpc, DL, NodeTys, Ops);
7696 
7697  if (IsCFICall)
7698  Chain.getNode()->setCFIType(CLI.CFIType->getZExtValue());
7699 
7700  DAG.addNoMergeSiteInfo(Chain.getNode(), CLI.NoMerge);
7701  InFlag = Chain.getValue(1);
7702  DAG.addCallSiteInfo(Chain.getNode(), std::move(CSInfo));
7703 
7704  uint64_t CalleePopBytes =
7705  DoesCalleeRestoreStack(CallConv, TailCallOpt) ? alignTo(NumBytes, 16) : 0;
7706 
7707  Chain = DAG.getCALLSEQ_END(Chain, NumBytes, CalleePopBytes, InFlag, DL);
7708  InFlag = Chain.getValue(1);
7709 
7710  // Handle result values, copying them out of physregs into vregs that we
7711  // return.
7712  SDValue Result = LowerCallResult(Chain, InFlag, CallConv, IsVarArg, RVLocs,
7713  DL, DAG, InVals, IsThisReturn,
7714  IsThisReturn ? OutVals[0] : SDValue());
7715 
7716  if (!Ins.empty())
7717  InFlag = Result.getValue(Result->getNumValues() - 1);
7718 
7719  if (RequiresSMChange) {
7720  assert(PStateSM && "Expected a PStateSM to be set");
7721  Result = changeStreamingMode(DAG, DL, !*RequiresSMChange, Result, InFlag,
7722  PStateSM, false);
7723  }
7724 
7725  if (RequiresLazySave) {
7726  // Unconditionally resume ZA.
7727  Result = DAG.getNode(
7728  AArch64ISD::SMSTART, DL, MVT::Other, Result,
7729  DAG.getTargetConstant((int32_t)(AArch64SVCR::SVCRZA), DL, MVT::i32),
7730  DAG.getConstant(0, DL, MVT::i64), DAG.getConstant(1, DL, MVT::i64));
7731 
7732  // Conditionally restore the lazy save using a pseudo node.
7733  unsigned FI = FuncInfo->getLazySaveTPIDR2Obj();
7734  SDValue RegMask = DAG.getRegisterMask(
7735  TRI->SMEABISupportRoutinesCallPreservedMaskFromX0());
7736  SDValue RestoreRoutine = DAG.getTargetExternalSymbol(
7737  "__arm_tpidr2_restore", getPointerTy(DAG.getDataLayout()));
7738  SDValue TPIDR2_EL0 = DAG.getNode(
7740  DAG.getConstant(Intrinsic::aarch64_sme_get_tpidr2, DL, MVT::i32));
7741 
7742  // Copy the address of the TPIDR2 block into X0 before 'calling' the
7743  // RESTORE_ZA pseudo.
7744  SDValue Glue;
7745  SDValue TPIDR2Block = DAG.getFrameIndex(
7747  Result = DAG.getCopyToReg(Result, DL, AArch64::X0, TPIDR2Block, Glue);
7749  {Result, TPIDR2_EL0,
7750  DAG.getRegister(AArch64::X0, MVT::i64),
7751  RestoreRoutine,
7752  RegMask,
7753  Result.getValue(1)});
7754 
7755  // Finally reset the TPIDR2_EL0 register to 0.
7756  Result = DAG.getNode(
7757  ISD::INTRINSIC_VOID, DL, MVT::Other, Result,
7758  DAG.getConstant(Intrinsic::aarch64_sme_set_tpidr2, DL, MVT::i32),
7759  DAG.getConstant(0, DL, MVT::i64));
7760  }
7761 
7762  if (RequiresSMChange || RequiresLazySave) {
7763  for (unsigned I = 0; I < InVals.size(); ++I) {
7764  // The smstart/smstop is chained as part of the call, but when the
7765  // resulting chain is discarded (which happens when the call is not part
7766  // of a chain, e.g. a call to @llvm.cos()), we need to ensure the
7767  // smstart/smstop is chained to the result value. We can do that by doing
7768  // a vreg -> vreg copy.
7770  getRegClassFor(InVals[I].getValueType().getSimpleVT()));
7771  SDValue X = DAG.getCopyToReg(Result, DL, Reg, InVals[I]);
7772  InVals[I] = DAG.getCopyFromReg(X, DL, Reg,
7773  InVals[I].getValueType());
7774  }
7775  }
7776 
7777  return Result;
7778 }
7779 
7780 bool AArch64TargetLowering::CanLowerReturn(
7781  CallingConv::ID CallConv, MachineFunction &MF, bool isVarArg,
7782  const SmallVectorImpl<ISD::OutputArg> &Outs, LLVMContext &Context) const {
7783  CCAssignFn *RetCC = CCAssignFnForReturn(CallConv);
7785  CCState CCInfo(CallConv, isVarArg, MF, RVLocs, Context);
7786  return CCInfo.CheckReturn(Outs, RetCC);
7787 }
7788 
7789 SDValue
7790 AArch64TargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv,
7791  bool isVarArg,
7792  const SmallVectorImpl<ISD::OutputArg> &Outs,
7793  const SmallVectorImpl<SDValue> &OutVals,
7794  const SDLoc &DL, SelectionDAG &DAG) const {
7795  auto &MF = DAG.getMachineFunction();
7796  auto *FuncInfo = MF.getInfo<AArch64FunctionInfo>();
7797 
7798  CCAssignFn *RetCC = CCAssignFnForReturn(CallConv);
7800  CCState CCInfo(CallConv, isVarArg, MF, RVLocs, *DAG.getContext());
7801  CCInfo.AnalyzeReturn(Outs, RetCC);
7802 
7803  // Copy the result values into the output registers.
7804  SDValue Flag;
7806  SmallSet<unsigned, 4> RegsUsed;
7807  for (unsigned i = 0, realRVLocIdx = 0; i != RVLocs.size();
7808  ++i, ++realRVLocIdx) {
7809  CCValAssign &VA = RVLocs[i];
7810  assert(VA.isRegLoc() && "Can only return in registers!");
7811  SDValue Arg = OutVals[realRVLocIdx];
7812 
7813  switch (VA.getLocInfo()) {
7814  default:
7815  llvm_unreachable("Unknown loc info!");
7816  case CCValAssign::Full:
7817  if (Outs[i].ArgVT == MVT::i1) {
7818  // AAPCS requires i1 to be zero-extended to i8 by the producer of the
7819  // value. This is strictly redundant on Darwin (which uses "zeroext
7820  // i1"), but will be optimised out before ISel.
7821  Arg = DAG.getNode(ISD::TRUNCATE, DL, MVT::i1, Arg);
7822  Arg = DAG.getNode(ISD::ZERO_EXTEND, DL, VA.getLocVT(), Arg);
7823  }
7824  break;
7825  case CCValAssign::BCvt:
7826  Arg = DAG.getNode(ISD::BITCAST, DL, VA.getLocVT(), Arg);
7827  break;
7828  case CCValAssign::AExt:
7829  case CCValAssign::ZExt:
7830  Arg = DAG.getZExtOrTrunc(Arg, DL, VA.getLocVT());
7831  break;
7833  assert(VA.getValVT() == MVT::i32 && "only expect 32 -> 64 upper bits");
7834  Arg = DAG.getZExtOrTrunc(Arg, DL, VA.getLocVT());
7835  Arg = DAG.getNode(ISD::SHL, DL, VA.getLocVT(), Arg,
7836  DAG.getConstant(32, DL, VA.getLocVT()));
7837  break;
7838  }
7839 
7840  if (RegsUsed.count(VA.getLocReg())) {
7841  SDValue &Bits =
7842  llvm::find_if(RetVals, [=](const std::pair<unsigned, SDValue> &Elt) {
7843  return Elt.first == VA.getLocReg();
7844  })->second;
7845  Bits = DAG.getNode(ISD::OR, DL, Bits.getValueType(), Bits, Arg);
7846  } else {
7847  RetVals.emplace_back(VA.getLocReg(), Arg);
7848  RegsUsed.insert(VA.getLocReg());
7849  }
7850  }
7851 
7852  const AArch64RegisterInfo *TRI = Subtarget->getRegisterInfo();
7853 
7854  // Emit SMSTOP before returning from a locally streaming function
7855  SMEAttrs FuncAttrs(MF.getFunction());
7856  if (FuncAttrs.hasStreamingBody() && !FuncAttrs.hasStreamingInterface()) {
7857  Chain = DAG.getNode(
7859  DAG.getTargetConstant((int32_t)AArch64SVCR::SVCRSM, DL, MVT::i32),
7860  DAG.getConstant(1, DL, MVT::i64), DAG.getConstant(0, DL, MVT::i64),
7861  DAG.getRegisterMask(TRI->getSMStartStopCallPreservedMask()));
7862  Flag = Chain.getValue(1);
7863  }
7864 
7865  SmallVector<SDValue, 4> RetOps(1, Chain);
7866  for (auto &RetVal : RetVals) {
7867  Chain = DAG.getCopyToReg(Chain, DL, RetVal.first, RetVal.second, Flag);
7868  Flag = Chain.getValue(1);
7869  RetOps.push_back(
7870  DAG.getRegister(RetVal.first, RetVal.second.getValueType()));
7871  }
7872 
7873  // Windows AArch64 ABIs require that for returning structs by value we copy
7874  // the sret argument into X0 for the return.
7875  // We saved the argument into a virtual register in the entry block,
7876  // so now we copy the value out and into X0.
7877  if (unsigned SRetReg = FuncInfo->getSRetReturnReg()) {
7878  SDValue Val = DAG.getCopyFromReg(RetOps[0], DL, SRetReg,
7879  getPointerTy(MF.getDataLayout()));
7880 
7881  unsigned RetValReg = AArch64::X0;
7882  Chain = DAG.getCopyToReg(Chain, DL, RetValReg, Val, Flag);
7883  Flag = Chain.getValue(1);
7884 
7885  RetOps.push_back(
7886  DAG.getRegister(RetValReg, getPointerTy(DAG.getDataLayout())));
7887  }
7888 
7889  const MCPhysReg *I = TRI->getCalleeSavedRegsViaCopy(&MF);
7890  if (I) {
7891  for (; *I; ++I) {
7892  if (AArch64::GPR64RegClass.contains(*I))
7893  RetOps.push_back(DAG.getRegister(*I, MVT::i64));
7894  else if (AArch64::FPR64RegClass.contains(*I))
7895  RetOps.push_back(DAG.getRegister(*I, MVT::getFloatingPointVT(64)));
7896  else
7897  llvm_unreachable("Unexpected register class in CSRsViaCopy!");
7898  }
7899  }
7900 
7901  RetOps[0] = Chain; // Update chain.
7902 
7903  // Add the flag if we have it.
7904  if (Flag.getNode())
7905  RetOps.push_back(Flag);
7906 
7907  return DAG.getNode(AArch64ISD::RET_FLAG, DL, MVT::Other, RetOps);
7908 }
7909 
7910 //===----------------------------------------------------------------------===//
7911 // Other Lowering Code
7912 //===----------------------------------------------------------------------===//
7913 
7914 SDValue AArch64TargetLowering::getTargetNode(GlobalAddressSDNode *N, EVT Ty,
7915  SelectionDAG &DAG,
7916  unsigned Flag) const {
7917  return DAG.getTargetGlobalAddress(N->getGlobal(), SDLoc(N), Ty,
7918  N->getOffset(), Flag);
7919 }
7920 
7921 SDValue AArch64TargetLowering::getTargetNode(JumpTableSDNode *N, EVT Ty,
7922  SelectionDAG &DAG,
7923  unsigned Flag) const {
7924  return DAG.getTargetJumpTable(N->getIndex(), Ty, Flag);
7925 }
7926 
7927 SDValue AArch64TargetLowering::getTargetNode(ConstantPoolSDNode *N, EVT Ty,
7928  SelectionDAG &DAG,
7929  unsigned Flag) const {
7930  return DAG.getTargetConstantPool(N->getConstVal(), Ty, N->getAlign(),
7931  N->getOffset(), Flag);
7932 }
7933 
7934 SDValue AArch64TargetLowering::getTargetNode(BlockAddressSDNode* N, EVT Ty,
7935  SelectionDAG &DAG,
7936  unsigned Flag) const {
7937  return DAG.getTargetBlockAddress(N->getBlockAddress(), Ty, 0, Flag);
7938 }
7939 
7940 // (loadGOT sym)
7941 template <class NodeTy>
7942 SDValue AArch64TargetLowering::getGOT(NodeTy *N, SelectionDAG &DAG,
7943  unsigned Flags) const {
7944  LLVM_DEBUG(dbgs() << "AArch64TargetLowering::getGOT\n");
7945  SDLoc DL(N);
7946  EVT Ty = getPointerTy(DAG.getDataLayout());
7947  SDValue GotAddr = getTargetNode(N, Ty, DAG, AArch64II::MO_GOT | Flags);
7948  // FIXME: Once remat is capable of dealing with instructions with register
7949  // operands, expand this into two nodes instead of using a wrapper node.
7950  return DAG.getNode(AArch64ISD::LOADgot, DL, Ty, GotAddr);
7951 }
7952 
7953 // (wrapper %highest(sym), %higher(sym), %hi(sym), %lo(sym))
7954 template <class NodeTy>
7955 SDValue AArch64TargetLowering::getAddrLarge(NodeTy *N, SelectionDAG &DAG,
7956  unsigned Flags) const {
7957  LLVM_DEBUG(dbgs() << "AArch64TargetLowering::getAddrLarge\n");
7958  SDLoc DL(N);
7959  EVT Ty = getPointerTy(DAG.getDataLayout());
7960  const unsigned char MO_NC = AArch64II::MO_NC;
7961  return DAG.getNode(
7963  getTargetNode(N, Ty, DAG, AArch64II::MO_G3 | Flags),
7964  getTargetNode(N, Ty, DAG, AArch64II::MO_G2 | MO_NC | Flags),
7965  getTargetNode(N, Ty, DAG, AArch64II::MO_G1 | MO_NC | Flags),
7966  getTargetNode(N, Ty, DAG, AArch64II::MO_G0 | MO_NC | Flags));
7967 }
7968 
7969 // (addlow (adrp %hi(sym)) %lo(sym))
7970 template <class NodeTy>
7971 SDValue AArch64TargetLowering::getAddr(NodeTy *N, SelectionDAG &DAG,
7972  unsigned Flags) const {
7973  LLVM_DEBUG(dbgs() << "AArch64TargetLowering::getAddr\n");
7974  SDLoc DL(N);
7975  EVT Ty = getPointerTy(DAG.getDataLayout());
7976  SDValue Hi = getTargetNode(N, Ty, DAG, AArch64II::MO_PAGE | Flags);
7977  SDValue Lo = getTargetNode(N, Ty, DAG,
7979  SDValue ADRP = DAG.getNode(AArch64ISD::ADRP, DL, Ty, Hi);
7980  return DAG.getNode(AArch64ISD::ADDlow, DL, Ty, ADRP, Lo);
7981 }
7982 
7983 // (adr sym)
7984 template <class NodeTy>
7985 SDValue AArch64TargetLowering::getAddrTiny(NodeTy *N, SelectionDAG &DAG,
7986  unsigned Flags) const {
7987  LLVM_DEBUG(dbgs() << "AArch64TargetLowering::getAddrTiny\n");
7988  SDLoc DL(N);
7989  EVT Ty = getPointerTy(DAG.getDataLayout());
7990  SDValue Sym = getTargetNode(N, Ty, DAG, Flags);
7991  return DAG.getNode(AArch64ISD::ADR, DL, Ty, Sym);
7992 }
7993 
7994 SDValue AArch64TargetLowering::LowerGlobalAddress(SDValue Op,
7995  SelectionDAG &DAG) const {
7996  GlobalAddressSDNode *GN = cast<GlobalAddressSDNode>(Op);
7997  const GlobalValue *GV = GN->getGlobal();
7998  unsigned OpFlags = Subtarget->ClassifyGlobalReference(GV, getTargetMachine());
7999 
8000  if (OpFlags != AArch64II::MO_NO_FLAG)
8001  assert(cast<GlobalAddressSDNode>(Op)->getOffset() == 0 &&
8002  "unexpected offset in global node");
8003 
8004  // This also catches the large code model case for Darwin, and tiny code
8005  // model with got relocations.
8006  if ((OpFlags & AArch64II::MO_GOT) != 0) {
8007  return getGOT(GN, DAG, OpFlags);
8008  }
8009 
8010  SDValue Result;
8012  Result = getAddrLarge(GN, DAG, OpFlags);
8013  } else if (getTargetMachine().getCodeModel() == CodeModel::Tiny) {
8014  Result = getAddrTiny(GN, DAG, OpFlags);
8015  } else {
8016  Result = getAddr(GN, DAG, OpFlags);
8017  }
8018  EVT PtrVT = getPointerTy(DAG.getDataLayout());
8019  SDLoc DL(GN);
8022  Result = DAG.getLoad(PtrVT, DL, DAG.getEntryNode(), Result,
8024  return Result;
8025 }
8026 
8027 /// Convert a TLS address reference into the correct sequence of loads
8028 /// and calls to compute the variable's address (for Darwin, currently) and
8029 /// return an SDValue containing the final node.
8030 
8031 /// Darwin only has one TLS scheme which must be capable of dealing with the
8032 /// fully general situation, in the worst case. This means:
8033 /// + "extern __thread" declaration.
8034 /// + Defined in a possibly unknown dynamic library.
8035 ///
8036 /// The general system is that each __thread variable has a [3 x i64] descriptor
8037 /// which contains information used by the runtime to calculate the address. The
8038 /// only part of this the compiler needs to know about is the first xword, which
8039 /// contains a function pointer that must be called with the address of the
8040 /// entire descriptor in "x0".
8041 ///
8042 /// Since this descriptor may be in a different unit, in general even the
8043 /// descriptor must be accessed via an indirect load. The "ideal" code sequence
8044 /// is:
8045 /// adrp x0, _var@TLVPPAGE
8046 /// ldr x0, [x0, _var@TLVPPAGEOFF] ; x0 now contains address of descriptor
8047 /// ldr x1, [x0] ; x1 contains 1st entry of descriptor,
8048 /// ; the function pointer
8049 /// blr x1 ; Uses descriptor address in x0
8050 /// ; Address of _var is now in x0.
8051 ///
8052 /// If the address of _var's descriptor *is* known to the linker, then it can
8053 /// change the first "ldr" instruction to an appropriate "add x0, x0, #imm" for
8054 /// a slight efficiency gain.
8055 SDValue
8056 AArch64TargetLowering::LowerDarwinGlobalTLSAddress(SDValue Op,
8057  SelectionDAG &DAG) const {
8058  assert(Subtarget->isTargetDarwin() &&
8059  "This function expects a Darwin target");
8060 
8061  SDLoc DL(Op);
8062  MVT PtrVT = getPointerTy(DAG.getDataLayout());
8063  MVT PtrMemVT = getPointerMemTy(DAG.getDataLayout());
8064  const GlobalValue *GV = cast<GlobalAddressSDNode>(Op)->getGlobal();
8065 
8066  SDValue TLVPAddr =
8067  DAG.getTargetGlobalAddress(GV, DL, PtrVT, 0, AArch64II::MO_TLS);
8068  SDValue DescAddr = DAG.getNode(AArch64ISD::LOADgot, DL, PtrVT, TLVPAddr);
8069 
8070  // The first entry in the descriptor is a function pointer that we must call
8071  // to obtain the address of the variable.
8072  SDValue Chain = DAG.getEntryNode();
8073  SDValue FuncTLVGet = DAG.getLoad(
8074  PtrMemVT, DL, Chain, DescAddr,
8076  Align(PtrMemVT.getSizeInBits() / 8),
8078  Chain = FuncTLVGet.getValue(1);
8079 
8080  // Extend loaded pointer if necessary (i.e. if ILP32) to DAG pointer.
8081  FuncTLVGet = DAG.getZExtOrTrunc(FuncTLVGet, DL, PtrVT);
8082 
8084  MFI.setAdjustsStack(true);
8085 
8086  // TLS calls preserve all registers except those that absolutely must be
8087  // trashed: X0 (it takes an argument), LR (it's a call) and NZCV (let's not be
8088  // silly).
8089  const AArch64RegisterInfo *TRI = Subtarget->getRegisterInfo();
8090  const uint32_t *Mask = TRI->getTLSCallPreservedMask();
8091  if (Subtarget->hasCustomCallingConv())
8092  TRI->UpdateCustomCallPreservedMask(DAG.getMachineFunction(), &Mask);
8093 
8094  // Finally, we can make the call. This is just a degenerate version of a
8095  // normal AArch64 call node: x0 takes the address of the descriptor, and
8096  // returns the address of the variable in this thread.
8097  Chain = DAG.getCopyToReg(Chain, DL, AArch64::X0, DescAddr, SDValue());
8098  Chain =
8100  Chain, FuncTLVGet, DAG.getRegister(AArch64::X0, MVT::i64),
8101  DAG.getRegisterMask(Mask), Chain.getValue(1));
8102  return DAG.getCopyFromReg(Chain, DL, AArch64::X0, PtrVT, Chain.getValue(1));
8103 }
8104 
8105 /// Convert a thread-local variable reference into a sequence of instructions to
8106 /// compute the variable's address for the local exec TLS model of ELF targets.
8107 /// The sequence depends on the maximum TLS area size.
8108 SDValue AArch64TargetLowering::LowerELFTLSLocalExec(const GlobalValue *GV,
8109  SDValue ThreadBase,
8110  const SDLoc &DL,
8111  SelectionDAG &DAG) const {
8112  EVT PtrVT = getPointerTy(DAG.getDataLayout());
8113  SDValue TPOff, Addr;
8114 
8115  switch (DAG.getTarget().Options.TLSSize) {
8116  default:
8117  llvm_unreachable("Unexpected TLS size");
8118 
8119  case 12: {
8120  // mrs x0, TPIDR_EL0
8121  // add x0, x0, :tprel_lo12:a
8122  SDValue Var = DAG.getTargetGlobalAddress(
8123  GV, DL, PtrVT, 0, AArch64II::MO_TLS | AArch64II::MO_PAGEOFF);
8124  return SDValue(DAG.getMachineNode(AArch64::ADDXri, DL, PtrVT, ThreadBase,
8125  Var,
8126  DAG.getTargetConstant(0, DL, MVT::i32)),
8127  0);
8128  }
8129 
8130  case 24: {
8131  // mrs x0, TPIDR_EL0
8132  // add x0, x0, :tprel_hi12:a
8133  // add x0, x0, :tprel_lo12_nc:a
8134  SDValue HiVar = DAG.getTargetGlobalAddress(
8135  GV, DL, PtrVT, 0, AArch64II::MO_TLS | AArch64II::MO_HI12);
8136  SDValue LoVar = DAG.getTargetGlobalAddress(
8137  GV, DL, PtrVT, 0,
8139  Addr = SDValue(DAG.getMachineNode(AArch64::ADDXri, DL, PtrVT, ThreadBase,
8140  HiVar,
8141  DAG.getTargetConstant(0, DL, MVT::i32)),
8142  0);
8143  return SDValue(DAG.getMachineNode(AArch64::ADDXri, DL, PtrVT, Addr,
8144  LoVar,
8145  DAG.getTargetConstant(0, DL, MVT::i32)),
8146  0);
8147  }
8148 
8149  case 32: {
8150  // mrs x1, TPIDR_EL0
8151  // movz x0, #:tprel_g1:a
8152  // movk x0, #:tprel_g0_nc:a
8153  // add x0, x1, x0
8154  SDValue HiVar = DAG.getTargetGlobalAddress(
8155  GV, DL, PtrVT, 0, AArch64II::MO_TLS | AArch64II::MO_G1);
8156  SDValue LoVar = DAG.getTargetGlobalAddress(
8157  GV, DL, PtrVT, 0,
8159  TPOff = SDValue(DAG.getMachineNode(AArch64::MOVZXi, DL, PtrVT, HiVar,
8160  DAG.getTargetConstant(16, DL, MVT::i32)),
8161  0);
8162  TPOff = SDValue(DAG.getMachineNode(AArch64::MOVKXi, DL, PtrVT, TPOff, LoVar,
8163  DAG.getTargetConstant(0, DL, MVT::i32)),
8164  0);
8165  return DAG.getNode(ISD::ADD, DL, PtrVT, ThreadBase, TPOff);
8166  }
8167 
8168  case 48: {
8169  // mrs x1, TPIDR_EL0
8170  // movz x0, #:tprel_g2:a
8171  // movk x0, #:tprel_g1_nc:a
8172  // movk x0, #:tprel_g0_nc:a
8173  // add x0, x1, x0
8174  SDValue HiVar = DAG.getTargetGlobalAddress(
8175  GV, DL, PtrVT, 0, AArch64II::MO_TLS | AArch64II::MO_G2);
8176  SDValue MiVar = DAG.getTargetGlobalAddress(
8177  GV, DL, PtrVT, 0,
8179  SDValue LoVar = DAG.getTargetGlobalAddress(
8180  GV, DL, PtrVT, 0,
8182  TPOff = SDValue(DAG.getMachineNode(AArch64::MOVZXi, DL, PtrVT, HiVar,
8183  DAG.getTargetConstant(32, DL, MVT::i32)),
8184  0);
8185  TPOff = SDValue(DAG.getMachineNode(AArch64::MOVKXi, DL, PtrVT, TPOff, MiVar,
8186  DAG.getTargetConstant(16, DL, MVT::i32)),
8187  0);
8188  TPOff = SDValue(DAG.getMachineNode(AArch64::MOVKXi, DL, PtrVT, TPOff, LoVar,
8189  DAG.getTargetConstant(0, DL, MVT::i32)),
8190  0);
8191  return DAG.getNode(ISD::ADD, DL, PtrVT, ThreadBase, TPOff);
8192  }
8193  }
8194 }
8195 
8196 /// When accessing thread-local variables under either the general-dynamic or
8197 /// local-dynamic system, we make a "TLS-descriptor" call. The variable will
8198 /// have a descriptor, accessible via a PC-relative ADRP, and whose first entry
8199 /// is a function pointer to carry out the resolution.
8200 ///
8201 /// The sequence is:
8202 /// adrp x0, :tlsdesc:var
8203 /// ldr x1, [x0, #:tlsdesc_lo12:var]
8204 /// add x0, x0, #:tlsdesc_lo12:var
8205 /// .tlsdesccall var
8206 /// blr x1
8207 /// (TPIDR_EL0 offset now in x0)
8208 ///
8209 /// The above sequence must be produced unscheduled, to enable the linker to
8210 /// optimize/relax this sequence.
8211 /// Therefore, a pseudo-instruction (TLSDESC_CALLSEQ) is used to represent the
8212 /// above sequence, and expanded really late in the compilation flow, to ensure
8213 /// the sequence is produced as per above.
8214 SDValue AArch64TargetLowering::LowerELFTLSDescCallSeq(SDValue SymAddr,
8215  const SDLoc &DL,
8216  SelectionDAG &DAG) const {
8217  EVT PtrVT = getPointerTy(DAG.getDataLayout());
8218 
8219  SDValue Chain = DAG.getEntryNode();
8220  SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
8221 
8222  Chain =
8223  DAG.getNode(AArch64ISD::TLSDESC_CALLSEQ, DL, NodeTys, {Chain, SymAddr});
8224  SDValue Glue = Chain.getValue(1);
8225 
8226  return DAG.getCopyFromReg(Chain, DL, AArch64::X0, PtrVT, Glue);
8227 }
8228 
8229 SDValue
8230 AArch64TargetLowering::LowerELFGlobalTLSAddress(SDValue Op,
8231  SelectionDAG &DAG) const {
8232  assert(Subtarget->isTargetELF() && "This function expects an ELF target");
8233 
8234  const GlobalAddressSDNode *GA = cast<GlobalAddressSDNode>(Op);
8235 
8237 
8241  }
8242 
8245  report_fatal_error("ELF TLS only supported in small memory model or "
8246  "in local exec TLS model");
8247  // Different choices can be made for the maximum size of the TLS area for a
8248  // module. For the small address model, the default TLS size is 16MiB and the
8249  // maximum TLS size is 4GiB.
8250  // FIXME: add tiny and large code model support for TLS access models other
8251  // than local exec. We currently generate the same code as small for tiny,
8252  // which may be larger than needed.
8253 
8254  SDValue TPOff;
8255  EVT PtrVT = getPointerTy(DAG.getDataLayout());
8256  SDLoc DL(Op);
8257  const GlobalValue *GV = GA->getGlobal();
8258 
8259  SDValue ThreadBase = DAG.getNode(AArch64ISD::THREAD_POINTER, DL, PtrVT);
8260 
8261  if (Model == TLSModel::LocalExec) {
8262  return LowerELFTLSLocalExec(GV, ThreadBase, DL, DAG);
8263  } else if (Model == TLSModel::InitialExec) {
8264  TPOff = DAG.getTargetGlobalAddress(GV, DL, PtrVT, 0, AArch64II::MO_TLS);
8265  TPOff = DAG.getNode(AArch64ISD::LOADgot, DL, PtrVT, TPOff);
8266  } else if (Model == TLSModel::LocalDynamic) {
8267  // Local-dynamic accesses proceed in two phases. A general-dynamic TLS
8268  // descriptor call against the special symbol _TLS_MODULE_BASE_ to calculate
8269  // the beginning of the module's TLS region, followed by a DTPREL offset
8270  // calculation.
8271 
8272  // These accesses will need deduplicating if there's more than one.
8273  AArch64FunctionInfo *MFI =
8276 
8277  // The call needs a relocation too for linker relaxation. It doesn't make
8278  // sense to call it MO_PAGE or MO_PAGEOFF though so we need another copy of
8279  // the address.
8280  SDValue SymAddr = DAG.getTargetExternalSymbol("_TLS_MODULE_BASE_", PtrVT,
8282 
8283  // Now we can calculate the offset from TPIDR_EL0 to this module's
8284  // thread-local area.
8285  TPOff = LowerELFTLSDescCallSeq(SymAddr, DL, DAG);
8286 
8287  // Now use :dtprel_whatever: operations to calculate this variable's offset
8288  // in its thread-storage area.
8289  SDValue HiVar = DAG.getTargetGlobalAddress(
8291  SDValue LoVar = DAG.getTargetGlobalAddress(
8292  GV, DL, MVT::i64, 0,
8294 
8295  TPOff = SDValue(DAG.getMachineNode(AArch64::ADDXri, DL, PtrVT, TPOff, HiVar,
8296  DAG.getTargetConstant(0, DL, MVT::i32)),
8297  0);
8298  TPOff = SDValue(DAG.getMachineNode(AArch64::ADDXri, DL, PtrVT, TPOff, LoVar,
8299  DAG.getTargetConstant(0, DL, MVT::i32)),
8300  0);
8301  } else if (Model == TLSModel::GeneralDynamic) {
8302  // The call needs a relocation too for linker relaxation. It doesn't make
8303  // sense to call it MO_PAGE or MO_PAGEOFF though so we need another copy of
8304  // the address.
8305  SDValue SymAddr =
8306  DAG.getTargetGlobalAddress(GV, DL, PtrVT, 0, AArch64II::MO_TLS);
8307 
8308  // Finally we can make a call to calculate the offset from tpidr_el0.
8309  TPOff = LowerELFTLSDescCallSeq(SymAddr, DL, DAG);
8310  } else
8311  llvm_unreachable("Unsupported ELF TLS access model");
8312 
8313  return DAG.getNode(ISD::ADD, DL, PtrVT, ThreadBase, TPOff);
8314 }
8315 
8316 SDValue
8317 AArch64TargetLowering::LowerWindowsGlobalTLSAddress(SDValue Op,
8318  SelectionDAG &DAG) const {
8319  assert(Subtarget->isTargetWindows() && "Windows specific TLS lowering");
8320 
8321  SDValue Chain = DAG.getEntryNode();
8322  EVT PtrVT = getPointerTy(DAG.getDataLayout());
8323  SDLoc DL(Op);
8324 
8325  SDValue TEB = DAG.getRegister(AArch64::X18, MVT::i64);
8326 
8327  // Load the ThreadLocalStoragePointer from the TEB
8328  // A pointer to the TLS array is located at offset 0x58 from the TEB.
8329  SDValue TLSArray =
8330  DAG.getNode(ISD::ADD, DL, PtrVT, TEB, DAG.getIntPtrConstant(0x58, DL));
8331  TLSArray = DAG.getLoad(PtrVT, DL, Chain, TLSArray, MachinePointerInfo());
8332  Chain = TLSArray.getValue(1);
8333 
8334  // Load the TLS index from the C runtime;
8335  // This does the same as getAddr(), but without having a GlobalAddressSDNode.
8336  // This also does the same as LOADgot, but using a generic i32 load,
8337  // while LOADgot only loads i64.
8338  SDValue TLSIndexHi =
8339  DAG.getTargetExternalSymbol("_tls_index", PtrVT, AArch64II::MO_PAGE);
8340  SDValue TLSIndexLo = DAG.getTargetExternalSymbol(
8341  "_tls_index", PtrVT, AArch64II::MO_PAGEOFF | AArch64II::MO_NC);
8342  SDValue ADRP = DAG.getNode(AArch64ISD::ADRP, DL, PtrVT, TLSIndexHi);
8343  SDValue TLSIndex =
8344  DAG.getNode(AArch64ISD::ADDlow, DL, PtrVT, ADRP, TLSIndexLo);
8345  TLSIndex = DAG.getLoad(MVT::i32, DL, Chain, TLSIndex, MachinePointerInfo());
8346  Chain = TLSIndex.getValue(1);
8347 
8348  // The pointer to the thread's TLS data area is at the TLS Index scaled by 8
8349  // offset into the TLSArray.
8350  TLSIndex = DAG.getNode(ISD::ZERO_EXTEND, DL, PtrVT, TLSIndex);
8351  SDValue Slot = DAG.getNode(ISD::SHL, DL, PtrVT, TLSIndex,
8352  DAG.getConstant(3, DL, PtrVT));
8353  SDValue TLS = DAG.getLoad(PtrVT, DL, Chain,
8354  DAG.getNode(ISD::ADD, DL, PtrVT, TLSArray, Slot),
8355  MachinePointerInfo());
8356  Chain = TLS.getValue(1);
8357 
8358  const GlobalAddressSDNode *GA = cast<GlobalAddressSDNode>(Op);
8359  const GlobalValue *GV = GA->getGlobal();
8360  SDValue TGAHi = DAG.getTargetGlobalAddress(
8361  GV, DL, PtrVT, 0, AArch64II::MO_TLS | AArch64II::MO_HI12);
8362  SDValue TGALo = DAG.getTargetGlobalAddress(
8363  GV, DL, PtrVT, 0,
8365 
8366  // Add the offset from the start of the .tls section (section base).
8367  SDValue Addr =
8368  SDValue(DAG.getMachineNode(AArch64::ADDXri, DL, PtrVT, TLS, TGAHi,
8369  DAG.getTargetConstant(0, DL, MVT::i32)),
8370  0);
8371  Addr = DAG.getNode(AArch64ISD::ADDlow, DL, PtrVT, Addr, TGALo);
8372  return Addr;
8373 }
8374 
8375 SDValue AArch64TargetLowering::LowerGlobalTLSAddress(SDValue Op,
8376  SelectionDAG &DAG) const {
8377  const GlobalAddressSDNode *GA = cast<GlobalAddressSDNode>(Op);
8378  if (DAG.getTarget().useEmulatedTLS())
8379  return LowerToTLSEmulatedModel(GA, DAG);
8380 
8381  if (Subtarget->isTargetDarwin())
8382  return LowerDarwinGlobalTLSAddress(Op, DAG);
8383  if (Subtarget->isTargetELF())
8384  return LowerELFGlobalTLSAddress(Op, DAG);
8385  if (Subtarget->isTargetWindows())
8386  return LowerWindowsGlobalTLSAddress(Op, DAG);
8387 
8388  llvm_unreachable("Unexpected platform trying to use TLS");
8389 }
8390 
8391 // Looks through \param Val to determine the bit that can be used to
8392 // check the sign of the value. It returns the unextended value and
8393 // the sign bit position.
8394 std::pair<SDValue, uint64_t> lookThroughSignExtension(SDValue Val) {
8395  if (Val.getOpcode() == ISD::SIGN_EXTEND_INREG)
8396  return {Val.getOperand(0),
8397  cast<VTSDNode>(Val.getOperand(1))->getVT().getFixedSizeInBits() -
8398  1};
8399 
8400  if (Val.getOpcode() == ISD::SIGN_EXTEND)
8401  return {Val.getOperand(0),
8402  Val.getOperand(0)->getValueType(0).getFixedSizeInBits() - 1};
8403 
8404  return {Val, Val.getValueSizeInBits() - 1};
8405 }
8406 
8407 SDValue AArch64TargetLowering::LowerBR_CC(SDValue Op, SelectionDAG &DAG) const {
8408  SDValue Chain = Op.getOperand(0);
8409  ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(1))->get();
8410  SDValue LHS = Op.getOperand(2);
8411  SDValue RHS = Op.getOperand(3);
8412  SDValue Dest = Op.getOperand(4);
8413  SDLoc dl(Op);
8414 
8415  MachineFunction &MF = DAG.getMachineFunction();
8416  // Speculation tracking/SLH assumes that optimized TB(N)Z/CB(N)Z instructions
8417  // will not be produced, as they are conditional branch instructions that do
8418  // not set flags.
8419  bool ProduceNonFlagSettingCondBr =
8420  !MF.getFunction().hasFnAttribute(Attribute::SpeculativeLoadHardening);
8421 
8422  // Handle f128 first, since lowering it will result in comparing the return
8423  // value of a libcall against zero, which is just what the rest of LowerBR_CC
8424  // is expecting to deal with.
8425  if (LHS.getValueType() == MVT::f128) {
8426  softenSetCCOperands(DAG, MVT::f128, LHS, RHS, CC, dl, LHS, RHS);
8427 
8428  // If softenSetCCOperands returned a scalar, we need to compare the result
8429  // against zero to select between true and false values.
8430  if (!RHS.getNode()) {
8431  RHS = DAG.getConstant(0, dl, LHS.getValueType());
8432  CC = ISD::SETNE;
8433  }
8434  }
8435 
8436  // Optimize {s|u}{add|sub|mul}.with.overflow feeding into a branch
8437  // instruction.
8439  (CC == ISD::SETEQ || CC == ISD::SETNE)) {
8440  // Only lower legal XALUO ops.
8441  if (!DAG.getTargetLoweringInfo().isTypeLegal(LHS->getValueType(0)))
8442  return SDValue();
8443 
8444  // The actual operation with overflow check.
8445  AArch64CC::CondCode OFCC;
8446  SDValue Value, Overflow;
8447  std::tie(Value, Overflow) = getAArch64XALUOOp(OFCC, LHS.getValue(0), DAG);
8448 
8449  if (CC == ISD::SETNE)
8450  OFCC = getInvertedCondCode(OFCC);
8451  SDValue CCVal = DAG.getConstant(OFCC, dl, MVT::i32);
8452 
8453  return DAG.getNode(AArch64ISD::BRCOND, dl, MVT::Other, Chain, Dest, CCVal,
8454  Overflow);
8455  }
8456 
8457  if (LHS.getValueType().isInteger()) {
8458  assert((LHS.getValueType() == RHS.getValueType()) &&
8459  (LHS.getValueType() == MVT::i32 || LHS.getValueType() == MVT::i64));
8460 
8461  // If the RHS of the comparison is zero, we can potentially fold this
8462  // to a specialized branch.
8463  const ConstantSDNode *RHSC = dyn_cast<ConstantSDNode>(RHS);
8464  if (RHSC && RHSC->getZExtValue() == 0 && ProduceNonFlagSettingCondBr) {
8465  if (CC == ISD::SETEQ) {
8466  // See if we can use a TBZ to fold in an AND as well.
8467  // TBZ has a smaller branch displacement than CBZ. If the offset is
8468  // out of bounds, a late MI-layer pass rewrites branches.
8469  // 403.gcc is an example that hits this case.
8470  if (LHS.getOpcode() == ISD::AND &&
8471  isa<ConstantSDNode>(LHS.getOperand(1)) &&
8472  isPowerOf2_64(LHS.getConstantOperandVal(1))) {
8473  SDValue Test = LHS.getOperand(0);
8474  uint64_t Mask = LHS.getConstantOperandVal(1);
8475  return DAG.getNode(AArch64ISD::TBZ, dl, MVT::Other, Chain, Test,
8476  DAG.getConstant(Log2_64(Mask), dl, MVT::i64),
8477  Dest);
8478  }
8479 
8480  return DAG.getNode(AArch64ISD::CBZ, dl, MVT::Other, Chain, LHS, Dest);
8481  } else if (CC == ISD::SETNE) {
8482  // See if we can use a TBZ to fold in an AND as well.
8483  // TBZ has a smaller branch displacement than CBZ. If the offset is
8484  // out of bounds, a late MI-layer pass rewrites branches.
8485  // 403.gcc is an example that hits this case.
8486  if (LHS.getOpcode() == ISD::AND &&
8487  isa<ConstantSDNode>(LHS.getOperand(1)) &&
8488  isPowerOf2_64(LHS.getConstantOperandVal(1))) {
8489  SDValue Test = LHS.getOperand(0);
8490  uint64_t Mask = LHS.getConstantOperandVal(1);
8491  return DAG.getNode(AArch64ISD::TBNZ, dl, MVT::Other, Chain, Test,
8492  DAG.getConstant(Log2_64(Mask), dl, MVT::i64),
8493  Dest);
8494  }
8495 
8496  return DAG.getNode(AArch64ISD::CBNZ, dl, MVT::Other, Chain, LHS, Dest);
8497  } else if (CC == ISD::SETLT && LHS.getOpcode() != ISD::AND) {
8498  // Don't combine AND since emitComparison converts the AND to an ANDS
8499  // (a.k.a. TST) and the test in the test bit and branch instruction
8500  // becomes redundant. This would also increase register pressure.
8501  uint64_t SignBitPos;
8502  std::tie(LHS, SignBitPos) = lookThroughSignExtension(LHS);
8503  return DAG.getNode(AArch64ISD::TBNZ, dl, MVT::Other, Chain, LHS,
8504  DAG.getConstant(SignBitPos, dl, MVT::i64), Dest);
8505  }
8506  }
8507  if (RHSC && RHSC->getSExtValue() == -1 && CC == ISD::SETGT &&
8508  LHS.getOpcode() != ISD::AND && ProduceNonFlagSettingCondBr) {
8509  // Don't combine AND since emitComparison converts the AND to an ANDS
8510  // (a.k.a. TST) and the test in the test bit and branch instruction
8511  // becomes redundant. This would also increase register pressure.
8512  uint64_t SignBitPos;
8513  std::tie(LHS, SignBitPos) = lookThroughSignExtension(LHS);
8514  return DAG.getNode(AArch64ISD::TBZ, dl, MVT::Other, Chain, LHS,
8515  DAG.getConstant(SignBitPos, dl, MVT::i64), Dest);
8516  }
8517 
8518  SDValue CCVal;
8519  SDValue Cmp = getAArch64Cmp(LHS, RHS, CC, CCVal, DAG, dl);
8520  return DAG.getNode(AArch64ISD::BRCOND, dl, MVT::Other, Chain, Dest, CCVal,
8521  Cmp);
8522  }
8523 
8524  assert(LHS.getValueType() == MVT::f16 || LHS.getValueType() == MVT::bf16 ||
8525  LHS.getValueType() == MVT::f32 || LHS.getValueType() == MVT::f64);
8526 
8527  // Unfortunately, the mapping of LLVM FP CC's onto AArch64 CC's isn't totally
8528  // clean. Some of them require two branches to implement.
8529  SDValue Cmp = emitComparison(LHS, RHS, CC, dl, DAG);
8530  AArch64CC::CondCode CC1, CC2;
8531  changeFPCCToAArch64CC(CC, CC1, CC2);
8532  SDValue CC1Val = DAG.getConstant(CC1, dl, MVT::i32);
8533  SDValue BR1 =
8534  DAG.getNode(AArch64ISD::BRCOND, dl, MVT::Other, Chain, Dest, CC1Val, Cmp);
8535  if (CC2 != AArch64CC::AL) {
8536  SDValue CC2Val = DAG.getConstant(CC2, dl, MVT::i32);
8537  return DAG.getNode(AArch64ISD::BRCOND, dl, MVT::Other, BR1, Dest, CC2Val,
8538  Cmp);
8539  }
8540 
8541  return BR1;
8542 }
8543 
8544 SDValue AArch64TargetLowering::LowerFCOPYSIGN(SDValue Op,
8545  SelectionDAG &DAG) const {
8546  if (!Subtarget->hasNEON())
8547  return SDValue();
8548 
8549  EVT VT = Op.getValueType();
8550  EVT IntVT = VT.changeTypeToInteger();
8551  SDLoc DL(Op);
8552 
8553  SDValue In1 = Op.getOperand(0);
8554  SDValue In2 = Op.getOperand(1);
8555  EVT SrcVT = In2.getValueType();
8556 
8557  if (!SrcVT.bitsEq(VT))
8558  In2 = DAG.getFPExtendOrRound(In2, DL, VT);
8559 
8560  if (VT.isScalableVector())
8561  IntVT =
8563 
8564  if (VT.isFixedLengthVector() &&
8566  EVT ContainerVT = getContainerForFixedLengthVector(DAG, VT);
8567 
8568  In1 = convertToScalableVector(DAG, ContainerVT, In1);
8569  In2 = convertToScalableVector(DAG, ContainerVT, In2);
8570 
8571  SDValue Res = DAG.getNode(ISD::FCOPYSIGN, DL, ContainerVT, In1, In2);
8572  return convertFromScalableVector(DAG, VT, Res);
8573  }
8574 
8575  auto BitCast = [this](EVT VT, SDValue Op, SelectionDAG &DAG) {
8576  if (VT.isScalableVector())
8577  return getSVESafeBitCast(VT, Op, DAG);
8578 
8579  return DAG.getBitcast(VT, Op);
8580  };
8581 
8582  SDValue VecVal1, VecVal2;
8583  EVT VecVT;
8584  auto SetVecVal = [&](int Idx = -1) {
8585  if (!VT.isVector()) {
8586  VecVal1 =
8587  DAG.getTargetInsertSubreg(Idx, DL, VecVT, DAG.getUNDEF(VecVT), In1);
8588  VecVal2 =
8589  DAG.getTargetInsertSubreg(Idx, DL, VecVT, DAG.getUNDEF(VecVT), In2);
8590  } else {
8591  VecVal1 = BitCast(VecVT, In1, DAG);
8592  VecVal2 = BitCast(VecVT, In2, DAG);
8593  }
8594  };
8595  if (VT.isVector()) {
8596  VecVT = IntVT;
8597  SetVecVal();
8598  } else if (VT == MVT::f64) {
8599  VecVT = MVT::v2i64;
8600  SetVecVal(AArch64::dsub);
8601  } else if (VT == MVT::f32) {
8602  VecVT = MVT::v4i32;
8603  SetVecVal(AArch64::ssub);
8604  } else if (VT == MVT::f16) {
8605  VecVT = MVT::v8i16;
8606  SetVecVal(AArch64::hsub);
8607  } else {
8608  llvm_unreachable("Invalid type for copysign!");
8609  }
8610 
8611  unsigned BitWidth = In1.getScalarValueSizeInBits();
8612  SDValue SignMaskV = DAG.getConstant(~APInt::getSignMask(BitWidth), DL, VecVT);
8613 
8614  // We want to materialize a mask with every bit but the high bit set, but the
8615  // AdvSIMD immediate moves cannot materialize that in a single instruction for
8616  // 64-bit elements. Instead, materialize all bits set and then negate that.
8617  if (VT == MVT::f64 || VT == MVT::v2f64) {
8618  SignMaskV = DAG.getConstant(APInt::getAllOnes(BitWidth), DL, VecVT);
8619  SignMaskV = DAG.getNode(ISD::BITCAST, DL, MVT::v2f64, SignMaskV);
8620  SignMaskV = DAG.getNode(ISD::FNEG, DL, MVT::v2f64, SignMaskV);
8621  SignMaskV = DAG.getNode(ISD::BITCAST, DL, MVT::v2i64, SignMaskV);
8622  }
8623 
8624  SDValue BSP =
8625  DAG.getNode(AArch64ISD::BSP, DL, VecVT, SignMaskV, VecVal1, VecVal2);
8626  if (VT == MVT::f16)
8627  return DAG.getTargetExtractSubreg(AArch64::hsub, DL, VT, BSP);
8628  if (VT == MVT::f32)
8629  return DAG.getTargetExtractSubreg(AArch64::ssub, DL, VT, BSP);
8630  if (VT == MVT::f64)
8631  return DAG.getTargetExtractSubreg(AArch64::dsub, DL, VT, BSP);
8632 
8633  return BitCast(VT, BSP, DAG);
8634 }
8635 
8636 SDValue AArch64TargetLowering::LowerCTPOP_PARITY(SDValue Op,
8637  SelectionDAG &DAG) const {
8639  Attribute::NoImplicitFloat))
8640  return SDValue();
8641 
8642  if (!Subtarget->hasNEON())
8643  return SDValue();
8644 
8645  bool IsParity = Op.getOpcode() == ISD::PARITY;
8646  SDValue Val = Op.getOperand(0);
8647  SDLoc DL(Op);
8648  EVT VT = Op.getValueType();
8649 
8650  // for i32, general parity function using EORs is more efficient compared to
8651  // using floating point
8652  if (VT == MVT::i32 && IsParity)
8653  return SDValue();
8654 
8655  // If there is no CNT instruction available, GPR popcount can
8656  // be more efficiently lowered to the following sequence that uses
8657  // AdvSIMD registers/instructions as long as the copies to/from
8658  // the AdvSIMD registers are cheap.
8659  // FMOV D0, X0 // copy 64-bit int to vector, high bits zero'd
8660  // CNT V0.8B, V0.8B // 8xbyte pop-counts
8661  // ADDV B0, V0.8B // sum 8xbyte pop-counts
8662  // UMOV X0, V0.B[0] // copy byte result back to integer reg
8663  if (VT == MVT::i32 || VT == MVT::i64) {
8664  if (VT == MVT::i32)
8665  Val = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i64, Val);
8666  Val = DAG.getNode(ISD::BITCAST, DL, MVT::v8i8, Val);
8667 
8668  SDValue CtPop = DAG.getNode(ISD::CTPOP, DL, MVT::v8i8, Val);
8669  SDValue UaddLV = DAG.getNode(
8671  DAG.getConstant(Intrinsic::aarch64_neon_uaddlv, DL, MVT::i32), CtPop);
8672 
8673  if (IsParity)
8674  UaddLV = DAG.getNode(ISD::AND, DL, MVT::i32, UaddLV,
8675  DAG.getConstant(1, DL, MVT::i32));
8676 
8677  if (VT == MVT::i64)
8678  UaddLV = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i64, UaddLV);
8679  return UaddLV;
8680  } else if (VT == MVT::i128) {
8681  Val = DAG.getNode(ISD::BITCAST, DL, MVT::v16i8, Val);
8682 
8683  SDValue CtPop = DAG.getNode(ISD::CTPOP, DL, MVT::v16i8, Val);
8684  SDValue UaddLV = DAG.getNode(
8686  DAG.getConstant(Intrinsic::aarch64_neon_uaddlv, DL, MVT::i32), CtPop);
8687 
8688  if (IsParity)
8689  UaddLV = DAG.getNode(ISD::AND, DL, MVT::i32, UaddLV,
8690  DAG.getConstant(1, DL, MVT::i32));
8691 
8692  return DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i128, UaddLV);
8693  }
8694 
8695  assert(!IsParity && "ISD::PARITY of vector types not supported");
8696 
8697  if (VT.isScalableVector() ||
8699  Subtarget->forceStreamingCompatibleSVE()))
8700  return LowerToPredicatedOp(Op, DAG, AArch64ISD::CTPOP_MERGE_PASSTHRU);
8701 
8702  assert((VT == MVT::v1i64 || VT == MVT::v2i64 || VT == MVT::v2i32 ||
8703  VT == MVT::v4i32 || VT == MVT::v4i16 || VT == MVT::v8i16) &&
8704  "Unexpected type for custom ctpop lowering");
8705 
8706  EVT VT8Bit = VT.is64BitVector() ? MVT::v8i8 : MVT::v16i8;
8707  Val = DAG.getBitcast(VT8Bit, Val);
8708  Val = DAG.getNode(ISD::CTPOP, DL, VT8Bit, Val);
8709 
8710  // Widen v8i8/v16i8 CTPOP result to VT by repeatedly widening pairwise adds.
8711  unsigned EltSize = 8;
8712  unsigned NumElts = VT.is64BitVector() ? 8 : 16;
8713  while (EltSize != VT.getScalarSizeInBits()) {
8714  EltSize *= 2;
8715  NumElts /= 2;
8716  MVT WidenVT = MVT::getVectorVT(MVT::getIntegerVT(EltSize), NumElts);
8717  Val = DAG.getNode(
8718  ISD::INTRINSIC_WO_CHAIN, DL, WidenVT,
8719  DAG.getConstant(Intrinsic::aarch64_neon_uaddlp, DL, MVT::i32), Val);
8720  }
8721 
8722  return Val;
8723 }
8724 
8725 SDValue AArch64TargetLowering::LowerCTTZ(SDValue Op, SelectionDAG &DAG) const {
8726  EVT VT = Op.getValueType();
8727  assert(VT.isScalableVector() ||
8729  VT, /*OverrideNEON=*/Subtarget->useSVEForFixedLengthVectors()));
8730 
8731  SDLoc DL(Op);
8732  SDValue RBIT = DAG.getNode(ISD::BITREVERSE, DL, VT, Op.getOperand(0));
8733  return DAG.getNode(ISD::CTLZ, DL, VT, RBIT);
8734 }
8735 
8736 SDValue AArch64TargetLowering::LowerMinMax(SDValue Op,
8737  SelectionDAG &DAG) const {
8738 
8739  EVT VT = Op.getValueType();
8740  SDLoc DL(Op);
8741  unsigned Opcode = Op.getOpcode();
8742  ISD::CondCode CC;
8743  switch (Opcode) {
8744  default:
8745  llvm_unreachable("Wrong instruction");
8746  case ISD::SMAX:
8747  CC = ISD::SETGT;
8748  break;
8749  case ISD::SMIN:
8750  CC = ISD::SETLT;
8751  break;
8752  case ISD::UMAX:
8753  CC = ISD::SETUGT;
8754  break;
8755  case ISD::UMIN:
8756  CC = ISD::SETULT;
8757  break;
8758  }
8759 
8760  if (VT.isScalableVector() ||
8762  VT, /*OverrideNEON=*/Subtarget->useSVEForFixedLengthVectors())) {
8763  switch (Opcode) {
8764  default:
8765  llvm_unreachable("Wrong instruction");
8766  case ISD::SMAX:
8767  return LowerToPredicatedOp(Op, DAG, AArch64ISD::SMAX_PRED);
8768  case ISD::SMIN:
8769  return LowerToPredicatedOp(Op, DAG, AArch64ISD::SMIN_PRED);
8770  case ISD::UMAX:
8771  return LowerToPredicatedOp(Op, DAG, AArch64ISD::UMAX_PRED);
8772  case ISD::UMIN:
8773  return LowerToPredicatedOp(Op, DAG, AArch64ISD::UMIN_PRED);
8774  }
8775  }
8776 
8777  SDValue Op0 = Op.getOperand(0);
8778  SDValue Op1 = Op.getOperand(1);
8779  SDValue Cond = DAG.getSetCC(DL, VT, Op0, Op1, CC);
8780  return DAG.getSelect(DL, VT, Cond, Op0, Op1);
8781 }
8782 
8783 SDValue AArch64TargetLowering::LowerBitreverse(SDValue Op,
8784  SelectionDAG &DAG) const {
8785  EVT VT = Op.getValueType();
8786 
8787  if (VT.isScalableVector() ||
8789  VT, /*OverrideNEON=*/Subtarget->useSVEForFixedLengthVectors()))
8790  return LowerToPredicatedOp(Op, DAG, AArch64ISD::BITREVERSE_MERGE_PASSTHRU);
8791 
8792  SDLoc DL(Op);
8793  SDValue REVB;
8794  MVT VST;
8795 
8796  switch (VT.getSimpleVT().SimpleTy) {
8797  default:
8798  llvm_unreachable("Invalid type for bitreverse!");
8799 
8800  case MVT::v2i32: {
8801  VST = MVT::v8i8;
8802  REVB = DAG.getNode(AArch64ISD::REV32, DL, VST, Op.getOperand(0));
8803 
8804  break;
8805  }
8806 
8807  case MVT::v4i32: {
8808  VST = MVT::v16i8;
8809  REVB = DAG.getNode(AArch64ISD::REV32, DL, VST, Op.getOperand(0));
8810 
8811  break;
8812  }
8813 
8814  case MVT::v1i64: {
8815  VST = MVT::v8i8;
8816  REVB = DAG.getNode(AArch64ISD::REV64, DL, VST, Op.getOperand(0));
8817 
8818  break;
8819  }
8820 
8821  case MVT::v2i64: {
8822  VST = MVT::v16i8;
8823  REVB = DAG.getNode(AArch64ISD::REV64, DL, VST, Op.getOperand(0));
8824 
8825  break;
8826  }
8827  }
8828 
8829  return DAG.getNode(AArch64ISD::NVCAST, DL, VT,
8830  DAG.getNode(ISD::BITREVERSE, DL, VST, REVB));
8831 }
8832 
8833 // Check whether the continuous comparison sequence.
8834 static bool
8835 isOrXorChain(SDValue N, unsigned &Num,
8836  SmallVector<std::pair<SDValue, SDValue>, 16> &WorkList) {
8837  if (Num == MaxXors)
8838  return false;
8839 
8840  // Skip the one-use zext
8841  if (N->getOpcode() == ISD::ZERO_EXTEND && N->hasOneUse())
8842  N = N->getOperand(0);
8843 
8844  // The leaf node must be XOR
8845  if (N->getOpcode() == ISD::XOR) {
8846  WorkList.push_back(std::make_pair(N->getOperand(0), N->getOperand(1)));
8847  Num++;
8848  return true;
8849  }
8850 
8851  // All the non-leaf nodes must be OR.
8852  if (N->getOpcode() != ISD::OR || !N->hasOneUse())
8853  return false;
8854 
8855  if (isOrXorChain(N->getOperand(0), Num, WorkList) &&
8856  isOrXorChain(N->getOperand(1), Num, WorkList))
8857  return true;
8858  return false;
8859 }
8860 
8861 // Transform chains of ORs and XORs, which usually outlined by memcmp/bmp.
8863  SDValue LHS = N->getOperand(0);
8864  SDValue RHS = N->getOperand(1);
8865  SDLoc DL(N);
8866  EVT VT = N->getValueType(0);
8868 
8869  // Only handle integer compares.
8870  if (N->getOpcode() != ISD::SETCC)
8871  return SDValue();
8872 
8873  ISD::CondCode Cond = cast<CondCodeSDNode>(N->getOperand(2))->get();
8874  // Try to express conjunction "cmp 0 (or (xor A0 A1) (xor B0 B1))" as:
8875  // sub A0, A1; ccmp B0, B1, 0, eq; cmp inv(Cond) flag
8876  unsigned NumXors = 0;
8877  if ((Cond == ISD::SETEQ || Cond == ISD::SETNE) && isNullConstant(RHS) &&
8878  LHS->getOpcode() == ISD::OR && LHS->hasOneUse() &&
8879  isOrXorChain(LHS, NumXors, WorkList)) {
8880  SDValue XOR0, XOR1;
8881  std::tie(XOR0, XOR1) = WorkList[0];
8882  unsigned LogicOp = (Cond == ISD::SETEQ) ? ISD::AND : ISD::OR;
8883  SDValue Cmp = DAG.getSetCC(DL, VT, XOR0, XOR1, Cond);
8884  for (unsigned I = 1; I < WorkList.size(); I++) {
8885  std::tie(XOR0, XOR1) = WorkList[I];
8886  SDValue CmpChain = DAG.getSetCC(DL, VT, XOR0, XOR1, Cond);
8887  Cmp = DAG.getNode(LogicOp, DL, VT, Cmp, CmpChain);
8888  }
8889 
8890  // Exit early by inverting the condition, which help reduce indentations.
8891  return Cmp;
8892  }
8893 
8894  return SDValue();
8895 }
8896 
8897 SDValue AArch64TargetLowering::LowerSETCC(SDValue Op, SelectionDAG &DAG) const {
8898 
8899  if (Op.getValueType().isVector())
8900  return LowerVSETCC(Op, DAG);
8901 
8902  bool IsStrict = Op->isStrictFPOpcode();
8903  bool IsSignaling = Op.getOpcode() == ISD::STRICT_FSETCCS;
8904  unsigned OpNo = IsStrict ? 1 : 0;
8905  SDValue Chain;
8906  if (IsStrict)
8907  Chain = Op.getOperand(0);
8908  SDValue LHS = Op.getOperand(OpNo + 0);
8909  SDValue RHS = Op.getOperand(OpNo + 1);
8910  ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(OpNo + 2))->get();
8911  SDLoc dl(Op);
8912 
8913  // We chose ZeroOrOneBooleanContents, so use zero and one.
8914  EVT VT = Op.getValueType();
8915  SDValue TVal = DAG.getConstant(1, dl, VT);
8916  SDValue FVal = DAG.getConstant(0, dl, VT);
8917 
8918  // Handle f128 first, since one possible outcome is a normal integer
8919  // comparison which gets picked up by the next if statement.
8920  if (LHS.getValueType() == MVT::f128) {
8921  softenSetCCOperands(DAG, MVT::f128, LHS, RHS, CC, dl, LHS, RHS, Chain,
8922  IsSignaling);
8923 
8924  // If softenSetCCOperands returned a scalar, use it.
8925  if (!RHS.getNode()) {
8926  assert(LHS.getValueType() == Op.getValueType() &&
8927  "Unexpected setcc expansion!");
8928  return IsStrict ? DAG.getMergeValues({LHS, Chain}, dl) : LHS;
8929  }
8930  }
8931 
8932  if (LHS.getValueType().isInteger()) {
8933  SDValue CCVal;
8935  LHS, RHS, ISD::getSetCCInverse(CC, LHS.getValueType()), CCVal, DAG, dl);
8936 
8937  // Note that we inverted the condition above, so we reverse the order of
8938  // the true and false operands here. This will allow the setcc to be
8939  // matched to a single CSINC instruction.
8940  SDValue Res = DAG.getNode(AArch64ISD::CSEL, dl, VT, FVal, TVal, CCVal, Cmp);
8941  return IsStrict ? DAG.getMergeValues({Res, Chain}, dl) : Res;
8942  }
8943 
8944  // Now we know we're dealing with FP values.
8945  assert(LHS.getValueType() == MVT::f16 || LHS.getValueType() == MVT::f32 ||
8946  LHS.getValueType() == MVT::f64);
8947 
8948  // If that fails, we'll need to perform an FCMP + CSEL sequence. Go ahead
8949  // and do the comparison.
8950  SDValue Cmp;
8951  if (IsStrict)
8952  Cmp = emitStrictFPComparison(LHS, RHS, dl, DAG, Chain, IsSignaling);
8953  else
8954  Cmp = emitComparison(LHS, RHS, CC, dl, DAG);
8955 
8956  AArch64CC::CondCode CC1, CC2;
8957  changeFPCCToAArch64CC(CC, CC1, CC2);
8958  SDValue Res;
8959  if (CC2 == AArch64CC::AL) {
8960  changeFPCCToAArch64CC(ISD::getSetCCInverse(CC, LHS.getValueType()), CC1,
8961  CC2);
8962  SDValue CC1Val = DAG.getConstant(CC1, dl, MVT::i32);
8963 
8964  // Note that we inverted the condition above, so we reverse the order of
8965  // the true and false operands here. This will allow the setcc to be
8966  // matched to a single CSINC instruction.
8967  Res = DAG.getNode(AArch64ISD::CSEL, dl, VT, FVal, TVal, CC1Val, Cmp);
8968  } else {
8969  // Unfortunately, the mapping of LLVM FP CC's onto AArch64 CC's isn't
8970  // totally clean. Some of them require two CSELs to implement. As is in
8971  // this case, we emit the first CSEL and then emit a second using the output
8972  // of the first as the RHS. We're effectively OR'ing the two CC's together.
8973 
8974  // FIXME: It would be nice if we could match the two CSELs to two CSINCs.
8975  SDValue CC1Val = DAG.getConstant(CC1, dl, MVT::i32);
8976  SDValue CS1 =
8977  DAG.getNode(AArch64ISD::CSEL, dl, VT, TVal, FVal, CC1Val, Cmp);
8978 
8979  SDValue CC2Val = DAG.getConstant(CC2, dl, MVT::i32);
8980  Res = DAG.getNode(AArch64ISD::CSEL, dl, VT, TVal, CS1, CC2Val, Cmp);
8981  }
8982  return IsStrict ? DAG.getMergeValues({Res, Cmp.getValue(1)}, dl) : Res;
8983 }
8984 
8985 SDValue AArch64TargetLowering::LowerSETCCCARRY(SDValue Op,
8986  SelectionDAG &DAG) const {
8987 
8988  SDValue LHS = Op.getOperand(0);
8989  SDValue RHS = Op.getOperand(1);
8990  EVT VT = LHS.getValueType();
8991  if (VT != MVT::i32 && VT != MVT::i64)
8992  return SDValue();
8993 
8994  SDLoc DL(Op);
8995  SDValue Carry = Op.getOperand(2);
8996  // SBCS uses a carry not a borrow so the carry flag should be inverted first.
8997  SDValue InvCarry = valueToCarryFlag(Carry, DAG, true);
8999  LHS, RHS, InvCarry);
9000 
9001  EVT OpVT = Op.getValueType();
9002  SDValue TVal = DAG.getConstant(1, DL, OpVT);
9003  SDValue FVal = DAG.getConstant(0, DL, OpVT);
9004 
9005  ISD::CondCode Cond = cast<CondCodeSDNode>(Op.getOperand(3))->get();
9006  ISD::CondCode CondInv = ISD::getSetCCInverse(Cond, VT);
9007  SDValue CCVal =
9009  // Inputs are swapped because the condition is inverted. This will allow
9010  // matching with a single CSINC instruction.
9011  return DAG.getNode(AArch64ISD::CSEL, DL, OpVT, FVal, TVal, CCVal,
9012  Cmp.getValue(1));
9013 }
9014 
9015 SDValue AArch64TargetLowering::LowerSELECT_CC(ISD::CondCode CC, SDValue LHS,
9016  SDValue RHS, SDValue TVal,
9017  SDValue FVal, const SDLoc &dl,
9018  SelectionDAG &DAG) const {
9019  // Handle f128 first, because it will result in a comparison of some RTLIB
9020  // call result against zero.
9021  if (LHS.getValueType() == MVT::f128) {
9022  softenSetCCOperands(DAG, MVT::f128, LHS, RHS, CC, dl, LHS, RHS);
9023 
9024  // If softenSetCCOperands returned a scalar, we need to compare the result
9025  // against zero to select between true and false values.
9026  if (!RHS.getNode()) {
9027  RHS = DAG.getConstant(0, dl, LHS.getValueType());
9028  CC = ISD::SETNE;
9029  }
9030  }
9031 
9032  // Also handle f16, for which we need to do a f32 comparison.
9033  if (LHS.getValueType() == MVT::f16 && !Subtarget->hasFullFP16()) {
9034  LHS = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f32, LHS);
9035  RHS = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f32, RHS);
9036  }
9037 
9038  // Next, handle integers.
9039  if (LHS.getValueType().isInteger()) {
9040  assert((LHS.getValueType() == RHS.getValueType()) &&
9041  (LHS.getValueType() == MVT::i32 || LHS.getValueType() == MVT::i64));
9042 
9043  ConstantSDNode *CFVal = dyn_cast<ConstantSDNode>(FVal);
9044  ConstantSDNode *CTVal = dyn_cast<ConstantSDNode>(TVal);
9045  ConstantSDNode *RHSC = dyn_cast<ConstantSDNode>(RHS);
9046  // Check for sign pattern (SELECT_CC setgt, iN lhs, -1, 1, -1) and transform
9047  // into (OR (ASR lhs, N-1), 1), which requires less instructions for the
9048  // supported types.
9049  if (CC == ISD::SETGT && RHSC && RHSC->isAllOnes() && CTVal && CFVal &&
9050  CTVal->isOne() && CFVal->isAllOnes() &&
9051  LHS.getValueType() == TVal.getValueType()) {
9052  EVT VT = LHS.getValueType();
9053  SDValue Shift =
9054  DAG.getNode(ISD::SRA, dl, VT, LHS,
9055  DAG.getConstant(VT.getSizeInBits() - 1, dl, VT));
9056  return DAG.getNode(ISD::OR, dl, VT, Shift, DAG.getConstant(1, dl, VT));
9057  }
9058 
9059  unsigned Opcode = AArch64ISD::CSEL;
9060 
9061  // If both the TVal and the FVal are constants, see if we can swap them in
9062  // order to for a CSINV or CSINC out of them.
9063  if (CTVal && CFVal && CTVal->isAllOnes() && CFVal->isZero()) {
9064  std::swap(TVal, FVal);
9065  std::swap(CTVal, CFVal);
9066  CC = ISD::getSetCCInverse(CC, LHS.getValueType());
9067  } else if (CTVal && CFVal && CTVal->isOne() && CFVal->isZero()) {
9068  std::swap(TVal, FVal);
9069  std::swap(CTVal, CFVal);
9070  CC = ISD::getSetCCInverse(CC, LHS.getValueType());
9071  } else if (TVal.getOpcode() == ISD::XOR) {
9072  // If TVal is a NOT we want to swap TVal and FVal so that we can match
9073  // with a CSINV rather than a CSEL.
9074  if (isAllOnesConstant(TVal.getOperand(1))) {
9075  std::swap(TVal, FVal);
9076  std::swap(CTVal, CFVal);
9077  CC = ISD::getSetCCInverse(CC, LHS.getValueType());
9078  }
9079  } else if (TVal.getOpcode() == ISD::SUB) {
9080  // If TVal is a negation (SUB from 0) we want to swap TVal and FVal so
9081  // that we can match with a CSNEG rather than a CSEL.
9082  if (isNullConstant(TVal.getOperand(0))) {
9083  std::swap(TVal, FVal);
9084  std::swap(CTVal, CFVal);
9085  CC = ISD::getSetCCInverse(CC, LHS.getValueType());
9086  }
9087  } else if (CTVal && CFVal) {
9088  const int64_t TrueVal = CTVal->getSExtValue();
9089  const int64_t FalseVal = CFVal->getSExtValue();
9090  bool Swap = false;
9091 
9092  // If both TVal and FVal are constants, see if FVal is the
9093  // inverse/negation/increment of TVal and generate a CSINV/CSNEG/CSINC
9094  // instead of a CSEL in that case.
9095  if (TrueVal == ~FalseVal) {
9096  Opcode = AArch64ISD::CSINV;
9098  TrueVal == -FalseVal) {
9099  Opcode = AArch64ISD::CSNEG;
9100  } else if (TVal.getValueType() == MVT::i32) {
9101  // If our operands are only 32-bit wide, make sure we use 32-bit
9102  // arithmetic for the check whether we can use CSINC. This ensures that
9103  // the addition in the check will wrap around properly in case there is
9104  // an overflow (which would not be the case if we do the check with
9105  // 64-bit arithmetic).
9106  const uint32_t TrueVal32 = CTVal->getZExtValue();
9107  const uint32_t FalseVal32 = CFVal->getZExtValue();
9108 
9109  if ((TrueVal32 == FalseVal32 + 1) || (TrueVal32 + 1 == FalseVal32)) {
9110  Opcode = AArch64ISD::CSINC;
9111 
9112  if (TrueVal32 > FalseVal32) {
9113  Swap = true;
9114  }
9115  }
9116  } else {
9117  // 64-bit check whether we can use CSINC.
9118  const uint64_t TrueVal64 = TrueVal;
9119  const uint64_t FalseVal64 = FalseVal;
9120 
9121  if ((TrueVal64 == FalseVal64 + 1) || (TrueVal64 + 1 == FalseVal64)) {
9122  Opcode = AArch64ISD::CSINC;
9123 
9124  if (TrueVal > FalseVal) {
9125  Swap = true;
9126  }
9127  }
9128  }
9129 
9130  // Swap TVal and FVal if necessary.
9131  if (Swap) {
9132  std::swap(TVal, FVal);
9133  std::swap(CTVal, CFVal);
9134  CC = ISD::getSetCCInverse(CC, LHS.getValueType());
9135  }
9136 
9137  if (Opcode != AArch64ISD::CSEL) {
9138  // Drop FVal since we can get its value by simply inverting/negating
9139  // TVal.
9140  FVal = TVal;
9141  }
9142  }
9143 
9144  // Avoid materializing a constant when possible by reusing a known value in
9145  // a register. However, don't perform this optimization if the known value
9146  // is one, zero or negative one in the case of a CSEL. We can always
9147  // materialize these values using CSINC, CSEL and CSINV with wzr/xzr as the
9148  // FVal, respectively.
9149  ConstantSDNode *RHSVal = dyn_cast<ConstantSDNode>(RHS);
9150  if (Opcode == AArch64ISD::CSEL && RHSVal && !RHSVal->isOne() &&
9151  !RHSVal->isZero() && !RHSVal->isAllOnes()) {
9153  // Transform "a == C ? C : x" to "a == C ? a : x" and "a != C ? x : C" to
9154  // "a != C ? x : a" to avoid materializing C.
9155  if (CTVal && CTVal == RHSVal && AArch64CC == AArch64CC::EQ)
9156  TVal = LHS;
9157  else if (CFVal && CFVal == RHSVal && AArch64CC == AArch64CC::NE)
9158  FVal = LHS;
9159  } else if (Opcode == AArch64ISD::CSNEG && RHSVal && RHSVal->isOne()) {
9160  assert (CTVal && CFVal && "Expected constant operands for CSNEG.");
9161  // Use a CSINV to transform "a == C ? 1 : -1" to "a == C ? a : -1" to
9162  // avoid materializing C.
9164  if (CTVal == RHSVal && AArch64CC == AArch64CC::EQ) {
9165  Opcode = AArch64ISD::CSINV;
9166  TVal = LHS;
9167  FVal = DAG.getConstant(0, dl, FVal.getValueType());
9168  }
9169  }
9170 
9171  SDValue CCVal;
9172  SDValue Cmp = getAArch64Cmp(LHS, RHS, CC, CCVal, DAG, dl);
9173  EVT VT = TVal.getValueType();
9174  return DAG.getNode(Opcode, dl, VT, TVal, FVal, CCVal, Cmp);
9175  }
9176 
9177  // Now we know we're dealing with FP values.
9178  assert(LHS.getValueType() == MVT::f16 || LHS.getValueType() == MVT::f32 ||
9179  LHS.getValueType() == MVT::f64);
9180  assert(LHS.getValueType() == RHS.getValueType());
9181  EVT VT = TVal.getValueType();
9182  SDValue Cmp = emitComparison(LHS, RHS, CC, dl, DAG);
9183 
9184  // Unfortunately, the mapping of LLVM FP CC's onto AArch64 CC's isn't totally
9185  // clean. Some of them require two CSELs to implement.
9186  AArch64CC::CondCode CC1, CC2;
9187  changeFPCCToAArch64CC(CC, CC1, CC2);
9188 
9189  if (DAG.getTarget().Options.UnsafeFPMath) {
9190  // Transform "a == 0.0 ? 0.0 : x" to "a == 0.0 ? a : x" and
9191  // "a != 0.0 ? x : 0.0" to "a != 0.0 ? x : a" to avoid materializing 0.0.
9192  ConstantFPSDNode *RHSVal = dyn_cast<ConstantFPSDNode>(RHS);
9193  if (RHSVal && RHSVal->isZero()) {
9194  ConstantFPSDNode *CFVal = dyn_cast<ConstantFPSDNode>(FVal);
9195  ConstantFPSDNode *CTVal = dyn_cast<ConstantFPSDNode>(TVal);
9196 
9197  if ((CC == ISD::SETEQ || CC == ISD::SETOEQ || CC == ISD::SETUEQ) &&
9198  CTVal && CTVal->isZero() && TVal.getValueType() == LHS.getValueType())
9199  TVal = LHS;
9200  else if ((CC == ISD::SETNE || CC == ISD::SETONE || CC == ISD::SETUNE) &&
9201  CFVal && CFVal->isZero() &&
9202  FVal.getValueType() == LHS.getValueType())
9203  FVal = LHS;
9204  }
9205  }
9206 
9207  // Emit first, and possibly only, CSEL.
9208  SDValue CC1Val = DAG.getConstant(CC1, dl, MVT::i32);
9209  SDValue CS1 = DAG.getNode(AArch64ISD::CSEL, dl, VT, TVal, FVal, CC1Val, Cmp);
9210 
9211  // If we need a second CSEL, emit it, using the output of the first as the
9212  // RHS. We're effectively OR'ing the two CC's together.
9213  if (CC2 != AArch64CC::AL) {
9214  SDValue CC2Val = DAG.getConstant(CC2, dl, MVT::i32);
9215  return DAG.getNode(AArch64ISD::CSEL, dl, VT, TVal, CS1, CC2Val, Cmp);
9216  }
9217 
9218  // Otherwise, return the output of the first CSEL.
9219  return CS1;
9220 }
9221 
9222 SDValue AArch64TargetLowering::LowerVECTOR_SPLICE(SDValue Op,
9223  SelectionDAG &DAG) const {
9224  EVT Ty = Op.getValueType();
9225  auto Idx = Op.getConstantOperandAPInt(2);
9226  int64_t IdxVal = Idx.getSExtValue();
9227  assert(Ty.isScalableVector() &&
9228  "Only expect scalable vectors for custom lowering of VECTOR_SPLICE");
9229 
9230  // We can use the splice instruction for certain index values where we are
9231  // able to efficiently generate the correct predicate. The index will be
9232  // inverted and used directly as the input to the ptrue instruction, i.e.
9233  // -1 -> vl1, -2 -> vl2, etc. The predicate will then be reversed to get the
9234  // splice predicate. However, we can only do this if we can guarantee that
9235  // there are enough elements in the vector, hence we check the index <= min
9236  // number of elements.
9237  std::optional<unsigned> PredPattern;
9238  if (Ty.isScalableVector() && IdxVal < 0 &&
9239  (PredPattern = getSVEPredPatternFromNumElements(std::abs(IdxVal))) !=
9240  std::nullopt) {
9241  SDLoc DL(Op);
9242 
9243  // Create a predicate where all but the last -IdxVal elements are false.
9244  EVT PredVT = Ty.changeVectorElementType(MVT::i1);
9245  SDValue Pred = getPTrue(DAG, DL, PredVT, *PredPattern);
9246  Pred = DAG.getNode(ISD::VECTOR_REVERSE, DL, PredVT, Pred);
9247 
9248  // Now splice the two inputs together using the predicate.
9249  return DAG.getNode(AArch64ISD::SPLICE, DL, Ty, Pred, Op.getOperand(0),
9250  Op.getOperand(1));
9251  }
9252 
9253  // This will select to an EXT instruction, which has a maximum immediate
9254  // value of 255, hence 2048-bits is the maximum value we can lower.
9255  if (IdxVal >= 0 &&
9256  IdxVal < int64_t(2048 / Ty.getVectorElementType().getSizeInBits()))
9257  return Op;
9258 
9259  return SDValue();
9260 }
9261 
9262 SDValue AArch64TargetLowering::LowerSELECT_CC(SDValue Op,
9263  SelectionDAG &DAG) const {
9264  ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(4))->get();
9265  SDValue LHS = Op.getOperand(0);
9266  SDValue RHS = Op.getOperand(1);
9267  SDValue TVal = Op.getOperand(2);
9268  SDValue FVal = Op.getOperand(3);
9269  SDLoc DL(Op);
9270  return LowerSELECT_CC(CC, LHS, RHS, TVal, FVal, DL, DAG);
9271 }
9272 
9273 SDValue AArch64TargetLowering::LowerSELECT(SDValue Op,
9274  SelectionDAG &DAG) const {
9275  SDValue CCVal = Op->getOperand(0);
9276  SDValue TVal = Op->getOperand(1);
9277  SDValue FVal = Op->getOperand(2);
9278  SDLoc DL(Op);
9279 
9280  EVT Ty = Op.getValueType();
9281  if (Ty.isScalableVector()) {
9282  SDValue TruncCC = DAG.getNode(ISD::TRUNCATE, DL, MVT::i1, CCVal);
9284  SDValue SplatPred = DAG.getNode(ISD::SPLAT_VECTOR, DL, PredVT, TruncCC);
9285  return DAG.getNode(ISD::VSELECT, DL, Ty, SplatPred, TVal, FVal);
9286  }
9287 
9289  Subtarget->forceStreamingCompatibleSVE())) {
9290  // FIXME: Ideally this would be the same as above using i1 types, however
9291  // for the moment we can't deal with fixed i1 vector types properly, so
9292  // instead extend the predicate to a result type sized integer vector.
9293  MVT SplatValVT = MVT::getIntegerVT(Ty.getScalarSizeInBits());
9294  MVT PredVT = MVT::getVectorVT(SplatValVT, Ty.getVectorElementCount());
9295  SDValue SplatVal = DAG.getSExtOrTrunc(CCVal, DL, SplatValVT);
9296  SDValue SplatPred = DAG.getNode(ISD::SPLAT_VECTOR, DL, PredVT, SplatVal);
9297  return DAG.getNode(ISD::VSELECT, DL, Ty, SplatPred, TVal, FVal);
9298  }
9299 
9300  // Optimize {s|u}{add|sub|mul}.with.overflow feeding into a select
9301  // instruction.
9302  if (ISD::isOverflowIntrOpRes(CCVal)) {
9303  // Only lower legal XALUO ops.
9304  if (!DAG.getTargetLoweringInfo().isTypeLegal(CCVal->getValueType(0)))
9305  return SDValue();
9306 
9307  AArch64CC::CondCode OFCC;
9308  SDValue Value, Overflow;
9309  std::tie(Value, Overflow) = getAArch64XALUOOp(OFCC, CCVal.getValue(0), DAG);
9310  SDValue CCVal = DAG.getConstant(OFCC, DL, MVT::i32);
9311 
9312  return DAG.getNode(AArch64ISD::CSEL, DL, Op.getValueType(), TVal, FVal,
9313  CCVal, Overflow);
9314  }
9315 
9316  // Lower it the same way as we would lower a SELECT_CC node.
9317  ISD::CondCode CC;
9318  SDValue LHS, RHS;
9319  if (CCVal.getOpcode() == ISD::SETCC) {
9320  LHS = CCVal.getOperand(0);
9321  RHS = CCVal.getOperand(1);
9322  CC = cast<CondCodeSDNode>(CCVal.getOperand(2))->get();
9323  } else {
9324  LHS = CCVal;
9325  RHS = DAG.getConstant(0, DL, CCVal.getValueType());
9326  CC = ISD::SETNE;
9327  }
9328 
9329  // If we are lowering a f16 and we do not have fullf16, convert to a f32 in
9330  // order to use FCSELSrrr
9331  if ((Ty == MVT::f16 || Ty == MVT::bf16) && !Subtarget->hasFullFP16()) {
9332  TVal = SDValue(
9333  DAG.getMachineNode(TargetOpcode::INSERT_SUBREG, DL, MVT::f32,
9334  DAG.getUNDEF(MVT::f32), TVal,
9335  DAG.getTargetConstant(AArch64::hsub, DL, MVT::i32)),
9336  0);
9337  FVal = SDValue(
9338  DAG.getMachineNode(TargetOpcode::INSERT_SUBREG, DL, MVT::f32,
9339  DAG.getUNDEF(MVT::f32), FVal,
9340  DAG.getTargetConstant(AArch64::hsub, DL, MVT::i32)),
9341  0);
9342  }
9343 
9344  SDValue Res = LowerSELECT_CC(CC, LHS, RHS, TVal, FVal, DL, DAG);
9345 
9346  if ((Ty == MVT::f16 || Ty == MVT::bf16) && !Subtarget->hasFullFP16()) {
9347  Res = SDValue(
9348  DAG.getMachineNode(TargetOpcode::EXTRACT_SUBREG, DL, Ty, Res,
9349  DAG.getTargetConstant(AArch64::hsub, DL, MVT::i32)),
9350  0);
9351  }
9352 
9353  return Res;
9354 }
9355 
9356 SDValue AArch64TargetLowering::LowerJumpTable(SDValue Op,
9357  SelectionDAG &DAG) const {
9358  // Jump table entries as PC relative offsets. No additional tweaking
9359  // is necessary here. Just get the address of the jump table.
9360  JumpTableSDNode *JT = cast<JumpTableSDNode>(Op);
9361 
9363  !Subtarget->isTargetMachO()) {
9364  return getAddrLarge(JT, DAG);
9365  } else if (getTargetMachine().getCodeModel() == CodeModel::Tiny) {
9366  return getAddrTiny(JT, DAG);
9367  }
9368  return getAddr(JT, DAG);
9369 }
9370 
9371 SDValue AArch64TargetLowering::LowerBR_JT(SDValue Op,
9372  SelectionDAG &DAG) const {
9373  // Jump table entries as PC relative offsets. No additional tweaking
9374  // is necessary here. Just get the address of the jump table.
9375  SDLoc DL(Op);
9376  SDValue JT = Op.getOperand(1);
9377  SDValue Entry = Op.getOperand(2);
9378  int JTI = cast<JumpTableSDNode>(JT.getNode())->getIndex();
9379 
9380  auto *AFI = DAG.getMachineFunction().getInfo<AArch64FunctionInfo>();
9381  AFI->setJumpTableEntryInfo(JTI, 4, nullptr);
9382 
9383  SDNode *Dest =
9384  DAG.getMachineNode(AArch64::JumpTableDest32, DL, MVT::i64, MVT::i64, JT,
9385  Entry, DAG.getTargetJumpTable(JTI, MVT::i32));
9386  return DAG.getNode(ISD::BRIND, DL, MVT::Other, Op.getOperand(0),
9387  SDValue(Dest, 0));
9388 }
9389 
9390 SDValue AArch64TargetLowering::LowerConstantPool(SDValue Op,
9391  SelectionDAG &DAG) const {
9392  ConstantPoolSDNode *CP = cast<ConstantPoolSDNode>(Op);
9393 
9395  // Use the GOT for the large code model on iOS.
9396  if (Subtarget->isTargetMachO()) {
9397  return getGOT(CP, DAG);
9398  }
9399  return getAddrLarge(CP, DAG);
9400  } else if (getTargetMachine().getCodeModel() == CodeModel::Tiny) {
9401  return getAddrTiny(CP, DAG);
9402  } else {
9403  return getAddr(CP, DAG);
9404  }
9405 }
9406 
9407 SDValue AArch64TargetLowering::LowerBlockAddress(SDValue Op,
9408  SelectionDAG &DAG) const {
9409  BlockAddressSDNode *BA = cast<BlockAddressSDNode>(Op);
9411  !Subtarget->isTargetMachO()) {
9412  return getAddrLarge(BA, DAG);
9413  } else if (getTargetMachine().getCodeModel() == CodeModel::Tiny) {
9414  return getAddrTiny(BA, DAG);
9415  }
9416  return getAddr(BA, DAG);
9417 }
9418 
9419 SDValue AArch64TargetLowering::LowerDarwin_VASTART(SDValue Op,
9420  SelectionDAG &DAG) const {
9421  AArch64FunctionInfo *FuncInfo =
9423 
9424  SDLoc DL(Op);
9425  SDValue FR = DAG.getFrameIndex(FuncInfo->getVarArgsStackIndex(),
9426  getPointerTy(DAG.getDataLayout()));
9427  FR = DAG.getZExtOrTrunc(FR, DL, getPointerMemTy(DAG.getDataLayout()));
9428  const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue();
9429  return DAG.getStore(Op.getOperand(0), DL, FR, Op.getOperand(1),
9430  MachinePointerInfo(SV));
9431 }
9432 
9433 SDValue AArch64TargetLowering::LowerWin64_VASTART(SDValue Op,
9434  SelectionDAG &DAG) const {
9435  MachineFunction &MF = DAG.getMachineFunction();
9437 
9438  SDLoc DL(Op);
9439  SDValue FR;
9440  if (Subtarget->isWindowsArm64EC()) {
9441  // With the Arm64EC ABI, we compute the address of the varargs save area
9442  // relative to x4. For a normal AArch64->AArch64 call, x4 == sp on entry,
9443  // but calls from an entry thunk can pass in a different address.
9444  Register VReg = MF.addLiveIn(AArch64::X4, &AArch64::GPR64RegClass);
9445  SDValue Val = DAG.getCopyFromReg(DAG.getEntryNode(), DL, VReg, MVT::i64);
9447  if (FuncInfo->getVarArgsGPRSize() > 0)
9448  StackOffset = -(uint64_t)FuncInfo->getVarArgsGPRSize();
9449  else
9450  StackOffset = FuncInfo->getVarArgsStackOffset();
9451  FR = DAG.getNode(ISD::ADD, DL, MVT::i64, Val,
9453  } else {
9454  FR = DAG.getFrameIndex(FuncInfo->getVarArgsGPRSize() > 0
9455  ? FuncInfo->getVarArgsGPRIndex()
9456  : FuncInfo->getVarArgsStackIndex(),
9457  getPointerTy(DAG.getDataLayout()));
9458  }
9459  const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue();
9460  return DAG.getStore(Op.getOperand(0), DL, FR, Op.getOperand(1),
9461  MachinePointerInfo(SV));
9462 }
9463 
9464 SDValue AArch64TargetLowering::LowerAAPCS_VASTART(SDValue Op,
9465  SelectionDAG &DAG) const {
9466  // The layout of the va_list struct is specified in the AArch64 Procedure Call
9467  // Standard, section B.3.
9468  MachineFunction &MF = DAG.getMachineFunction();
9470  unsigned PtrSize = Subtarget->isTargetILP32() ? 4 : 8;
9471  auto PtrMemVT = getPointerMemTy(DAG.getDataLayout());
9472  auto PtrVT = getPointerTy(DAG.getDataLayout());
9473  SDLoc DL(Op);
9474 
9475  SDValue Chain = Op.getOperand(0);
9476  SDValue VAList = Op.getOperand(1);
9477  const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue();
9478  SmallVector<SDValue, 4> MemOps;
9479 
9480  // void *__stack at offset 0
9481  unsigned Offset = 0;
9482  SDValue Stack = DAG.getFrameIndex(FuncInfo->getVarArgsStackIndex(), PtrVT);
9483  Stack = DAG.getZExtOrTrunc(Stack, DL, PtrMemVT);
9484  MemOps.push_back(DAG.getStore(Chain, DL, Stack, VAList,
9485  MachinePointerInfo(SV), Align(PtrSize)));
9486 
9487  // void *__gr_top at offset 8 (4 on ILP32)
9488  Offset += PtrSize;
9489  int GPRSize = FuncInfo->getVarArgsGPRSize();
9490  if (GPRSize > 0) {
9491  SDValue GRTop, GRTopAddr;
9492 
9493  GRTopAddr = DAG.getNode(ISD::ADD, DL, PtrVT, VAList,
9494  DAG.getConstant(Offset, DL, PtrVT));
9495 
9496  GRTop = DAG.getFrameIndex(FuncInfo->getVarArgsGPRIndex(), PtrVT);
9497  GRTop = DAG.getNode(ISD::ADD, DL, PtrVT, GRTop,
9498  DAG.getConstant(GPRSize, DL, PtrVT));
9499  GRTop = DAG.getZExtOrTrunc(GRTop, DL, PtrMemVT);
9500 
9501  MemOps.push_back(DAG.getStore(Chain, DL, GRTop, GRTopAddr,
9503  Align(PtrSize)));
9504  }
9505 
9506  // void *__vr_top at offset 16 (8 on ILP32)
9507  Offset += PtrSize;
9508  int FPRSize = FuncInfo->getVarArgsFPRSize();
9509  if (FPRSize > 0) {
9510  SDValue VRTop, VRTopAddr;
9511  VRTopAddr = DAG.getNode(ISD::ADD, DL, PtrVT, VAList,
9512  DAG.getConstant(Offset, DL, PtrVT));
9513 
9514  VRTop = DAG.getFrameIndex(FuncInfo->getVarArgsFPRIndex(), PtrVT);
9515  VRTop = DAG.getNode(ISD::ADD, DL, PtrVT, VRTop,
9516  DAG.getConstant(FPRSize, DL, PtrVT));
9517  VRTop = DAG.getZExtOrTrunc(VRTop, DL, PtrMemVT);
9518 
9519  MemOps.push_back(DAG.getStore(Chain, DL, VRTop, VRTopAddr,
9521  Align(PtrSize)));
9522  }
9523 
9524  // int __gr_offs at offset 24 (12 on ILP32)
9525  Offset += PtrSize;
9526  SDValue GROffsAddr = DAG.getNode(ISD::ADD, DL, PtrVT, VAList,
9527  DAG.getConstant(Offset, DL, PtrVT));
9528  MemOps.push_back(
9529  DAG.getStore(Chain, DL, DAG.getConstant(-GPRSize, DL, MVT::i32),
9530  GROffsAddr, MachinePointerInfo(SV, Offset), Align(4)));
9531 
9532  // int __vr_offs at offset 28 (16 on ILP32)
9533  Offset += 4;
9534  SDValue VROffsAddr = DAG.getNode(ISD::ADD, DL, PtrVT, VAList,
9535  DAG.getConstant(Offset, DL, PtrVT));
9536  MemOps.push_back(
9537  DAG.getStore(Chain, DL, DAG.getConstant(-FPRSize, DL, MVT::i32),
9538  VROffsAddr, MachinePointerInfo(SV, Offset), Align(4)));
9539 
9540  return DAG.getNode(ISD::TokenFactor, DL, MVT::Other, MemOps);
9541 }
9542 
9543 SDValue AArch64TargetLowering::LowerVASTART(SDValue Op,
9544  SelectionDAG &DAG) const {
9545  MachineFunction &MF = DAG.getMachineFunction();
9546 
9547  if (Subtarget->isCallingConvWin64(MF.getFunction().getCallingConv()))
9548  return LowerWin64_VASTART(Op, DAG);
9549  else if (Subtarget->isTargetDarwin())
9550  return LowerDarwin_VASTART(Op, DAG);
9551  else
9552  return LowerAAPCS_VASTART(Op, DAG);
9553 }
9554 
9555 SDValue AArch64TargetLowering::LowerVACOPY(SDValue Op,
9556  SelectionDAG &DAG) const {
9557  // AAPCS has three pointers and two ints (= 32 bytes), Darwin has single
9558  // pointer.
9559  SDLoc DL(Op);
9560  unsigned PtrSize = Subtarget->isTargetILP32() ? 4 : 8;
9561  unsigned VaListSize =
9562  (Subtarget->isTargetDarwin() || Subtarget->isTargetWindows())
9563  ? PtrSize
9564  : Subtarget->isTargetILP32() ? 20 : 32;
9565  const Value *DestSV = cast<SrcValueSDNode>(Op.getOperand(3))->getValue();
9566  const Value *SrcSV = cast<SrcValueSDNode>(Op.getOperand(4))->getValue();
9567 
9568  return DAG.getMemcpy(Op.getOperand(0), DL, Op.getOperand(1), Op.getOperand(2),
9569  DAG.getConstant(VaListSize, DL, MVT::i32),
9570  Align(PtrSize), false, false, false,
9571  MachinePointerInfo(DestSV), MachinePointerInfo(SrcSV));
9572 }
9573 
9574 SDValue AArch64TargetLowering::LowerVAARG(SDValue Op, SelectionDAG &DAG) const {
9575  assert(Subtarget->isTargetDarwin() &&
9576  "automatic va_arg instruction only works on Darwin");
9577 
9578  const Value *V = cast<SrcValueSDNode>(Op.getOperand(2))->getValue();
9579  EVT VT = Op.getValueType();
9580  SDLoc DL(Op);
9581  SDValue Chain = Op.getOperand(0);
9582  SDValue Addr = Op.getOperand(1);
9583  MaybeAlign Align(Op.getConstantOperandVal(3));
9584  unsigned MinSlotSize = Subtarget->isTargetILP32() ? 4 : 8;
9585  auto PtrVT = getPointerTy(DAG.getDataLayout());
9586  auto PtrMemVT = getPointerMemTy(DAG.getDataLayout());
9587  SDValue VAList =
9588  DAG.getLoad(PtrMemVT, DL, Chain, Addr, MachinePointerInfo(V));
9589  Chain = VAList.getValue(1);
9590  VAList = DAG.getZExtOrTrunc(VAList, DL, PtrVT);
9591 
9592  if (VT.isScalableVector())
9593  report_fatal_error("Passing SVE types to variadic functions is "
9594  "currently not supported");
9595 
9596  if (Align && *Align > MinSlotSize) {
9597  VAList = DAG.getNode(ISD::ADD, DL, PtrVT, VAList,
9598  DAG.getConstant(Align->value() - 1, DL, PtrVT));
9599  VAList = DAG.getNode(ISD::AND, DL, PtrVT, VAList,
9600  DAG.getConstant(-(int64_t)Align->value(), DL, PtrVT));
9601  }
9602 
9603  Type *ArgTy = VT.getTypeForEVT(*DAG.getContext());
9604  unsigned ArgSize = DAG.getDataLayout().getTypeAllocSize(ArgTy);
9605 
9606  // Scalar integer and FP values smaller than 64 bits are implicitly extended
9607  // up to 64 bits. At the very least, we have to increase the striding of the
9608  // vaargs list to match this, and for FP values we need to introduce
9609  // FP_ROUND nodes as well.
9610  if (VT.isInteger() && !VT.isVector())
9611  ArgSize = std::max(ArgSize, MinSlotSize);
9612  bool NeedFPTrunc = false;
9613  if (VT.isFloatingPoint() && !VT.isVector() && VT != MVT::f64) {
9614  ArgSize = 8;
9615  NeedFPTrunc = true;
9616  }
9617 
9618  // Increment the pointer, VAList, to the next vaarg
9619  SDValue VANext = DAG.getNode(ISD::ADD, DL, PtrVT, VAList,
9620  DAG.getConstant(ArgSize, DL, PtrVT));
9621  VANext = DAG.getZExtOrTrunc(VANext, DL, PtrMemVT);
9622 
9623  // Store the incremented VAList to the legalized pointer
9624  SDValue APStore =
9625  DAG.getStore(Chain, DL, VANext, Addr, MachinePointerInfo(V));
9626 
9627  // Load the actual argument out of the pointer VAList
9628  if (NeedFPTrunc) {
9629  // Load the value as an f64.
9630  SDValue WideFP =
9631  DAG.getLoad(MVT::f64, DL, APStore, VAList, MachinePointerInfo());
9632  // Round the value down to an f32.
9633  SDValue NarrowFP =
9634  DAG.getNode(ISD::FP_ROUND, DL, VT, WideFP.getValue(0),
9635  DAG.getIntPtrConstant(1, DL, /*isTarget=*/true));
9636  SDValue Ops[] = { NarrowFP, WideFP.getValue(1) };
9637  // Merge the rounded value with the chain output of the load.
9638  return DAG.getMergeValues(Ops, DL);
9639  }
9640 
9641  return DAG.getLoad(VT, DL, APStore, VAList, MachinePointerInfo());
9642 }
9643 
9644 SDValue AArch64TargetLowering::LowerFRAMEADDR(SDValue Op,
9645  SelectionDAG &DAG) const {
9647  MFI.setFrameAddressIsTaken(true);
9648 
9649  EVT VT = Op.getValueType();
9650  SDLoc DL(Op);
9651  unsigned Depth = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
9652  SDValue FrameAddr =
9653  DAG.getCopyFromReg(DAG.getEntryNode(), DL, AArch64::FP, MVT::i64);
9654  while (Depth--)
9655  FrameAddr = DAG.getLoad(VT, DL, DAG.getEntryNode(), FrameAddr,
9656  MachinePointerInfo());
9657 
9658  if (Subtarget->isTargetILP32())
9659  FrameAddr = DAG.getNode(ISD::AssertZext, DL, MVT::i64, FrameAddr,
9660  DAG.getValueType(VT));
9661 
9662  return FrameAddr;
9663 }
9664 
9665 SDValue AArch64TargetLowering::LowerSPONENTRY(SDValue Op,
9666  SelectionDAG &DAG) const {
9668 
9669  EVT VT = getPointerTy(DAG.getDataLayout());
9670  SDLoc DL(Op);
9671  int FI = MFI.CreateFixedObject(4, 0, false);
9672  return DAG.getFrameIndex(FI, VT);
9673 }
9674 
9675 #define GET_REGISTER_MATCHER
9676 #include "AArch64GenAsmMatcher.inc"
9677 
9678 // FIXME? Maybe this could be a TableGen attribute on some registers and
9679 // this table could be generated automatically from RegInfo.
9680 Register AArch64TargetLowering::
9681 getRegisterByName(const char* RegName, LLT VT, const MachineFunction &MF) const {
9683  if (AArch64::X1 <= Reg && Reg <= AArch64::X28) {
9684  const MCRegisterInfo *MRI = Subtarget->getRegisterInfo();
9685  unsigned DwarfRegNum = MRI->getDwarfRegNum(Reg, false);
9686  if (!Subtarget->isXRegisterReserved(DwarfRegNum))
9687  Reg = 0;
9688  }
9689  if (Reg)
9690  return Reg;
9691  report_fatal_error(Twine("Invalid register name \""
9692  + StringRef(RegName) + "\"."));
9693 }
9694 
9695 SDValue AArch64TargetLowering::LowerADDROFRETURNADDR(SDValue Op,
9696  SelectionDAG &DAG) const {
9698 
9699  EVT VT = Op.getValueType();
9700  SDLoc DL(Op);
9701 
9702  SDValue FrameAddr =
9703  DAG.getCopyFromReg(DAG.getEntryNode(), DL, AArch64::FP, VT);
9705 
9706  return DAG.getNode(ISD::ADD, DL, VT, FrameAddr, Offset);
9707 }
9708 
9709 SDValue AArch64TargetLowering::LowerRETURNADDR(SDValue Op,
9710  SelectionDAG &DAG) const {
9711  MachineFunction &MF = DAG.getMachineFunction();
9712  MachineFrameInfo &MFI = MF.getFrameInfo();
9713  MFI.setReturnAddressIsTaken(true);
9714 
9715  EVT VT = Op.getValueType();
9716  SDLoc DL(Op);
9717  unsigned Depth = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
9718  SDValue ReturnAddress;
9719  if (Depth) {
9720  SDValue FrameAddr = LowerFRAMEADDR(Op, DAG);
9722  ReturnAddress = DAG.getLoad(
9723  VT, DL, DAG.getEntryNode(),
9724  DAG.getNode(ISD::ADD, DL, VT, FrameAddr, Offset), MachinePointerInfo());
9725  } else {
9726  // Return LR, which contains the return address. Mark it an implicit
9727  // live-in.
9728  Register Reg = MF.addLiveIn(AArch64::LR, &AArch64::GPR64RegClass);
9729  ReturnAddress = DAG.getCopyFromReg(DAG.getEntryNode(), DL, Reg, VT);
9730  }
9731 
9732  // The XPACLRI instruction assembles to a hint-space instruction before
9733  // Armv8.3-A therefore this instruction can be safely used for any pre
9734  // Armv8.3-A architectures. On Armv8.3-A and onwards XPACI is available so use
9735  // that instead.
9736  SDNode *St;
9737  if (Subtarget->hasPAuth()) {
9738  St = DAG.getMachineNode(AArch64::XPACI, DL, VT, ReturnAddress);
9739  } else {
9740  // XPACLRI operates on LR therefore we must move the operand accordingly.
9741  SDValue Chain =
9742  DAG.getCopyToReg(DAG.getEntryNode(), DL, AArch64::LR, ReturnAddress);
9743  St = DAG.getMachineNode(AArch64::XPACLRI, DL, VT, Chain);
9744  }
9745  return SDValue(St, 0);
9746 }
9747 
9748 /// LowerShiftParts - Lower SHL_PARTS/SRA_PARTS/SRL_PARTS, which returns two
9749 /// i32 values and take a 2 x i32 value to shift plus a shift amount.
9750 SDValue AArch64TargetLowering::LowerShiftParts(SDValue Op,
9751  SelectionDAG &DAG) const {
9752  SDValue Lo, Hi;
9753  expandShiftParts(Op.getNode(), Lo, Hi, DAG);
9754  return DAG.getMergeValues({Lo, Hi}, SDLoc(Op));
9755 }
9756 
9758  const GlobalAddressSDNode *GA) const {
9759  // Offsets are folded in the DAG combine rather than here so that we can
9760  // intelligently choose an offset based on the uses.
9761  return false;
9762 }
9763 
9765  bool OptForSize) const {
9766  bool IsLegal = false;
9767  // We can materialize #0.0 as fmov $Rd, XZR for 64-bit, 32-bit cases, and
9768  // 16-bit case when target has full fp16 support.
9769  // FIXME: We should be able to handle f128 as well with a clever lowering.
9770  const APInt ImmInt = Imm.bitcastToAPInt();
9771  if (VT == MVT::f64)
9772  IsLegal = AArch64_AM::getFP64Imm(ImmInt) != -1 || Imm.isPosZero();
9773  else if (VT == MVT::f32)
9774  IsLegal = AArch64_AM::getFP32Imm(ImmInt) != -1 || Imm.isPosZero();
9775  else if (VT == MVT::f16)
9776  IsLegal =
9777  (Subtarget->hasFullFP16() && AArch64_AM::getFP16Imm(ImmInt) != -1) ||
9778  Imm.isPosZero();
9779 
9780  // If we can not materialize in immediate field for fmov, check if the
9781  // value can be encoded as the immediate operand of a logical instruction.
9782  // The immediate value will be created with either MOVZ, MOVN, or ORR.
9783  // TODO: fmov h0, w0 is also legal, however we don't have an isel pattern to
9784  // generate that fmov.
9785  if (!IsLegal && (VT == MVT::f64 || VT == MVT::f32)) {
9786  // The cost is actually exactly the same for mov+fmov vs. adrp+ldr;
9787  // however the mov+fmov sequence is always better because of the reduced
9788  // cache pressure. The timings are still the same if you consider
9789  // movw+movk+fmov vs. adrp+ldr (it's one instruction longer, but the
9790  // movw+movk is fused). So we limit up to 2 instrdduction at most.
9793  unsigned Limit = (OptForSize ? 1 : (Subtarget->hasFuseLiterals() ? 5 : 2));
9794  IsLegal = Insn.size() <= Limit;
9795  }
9796 
9797  LLVM_DEBUG(dbgs() << (IsLegal ? "Legal " : "Illegal ") << VT
9798  << " imm value: "; Imm.dump(););
9799  return IsLegal;
9800 }
9801 
9802 //===----------------------------------------------------------------------===//
9803 // AArch64 Optimization Hooks
9804 //===----------------------------------------------------------------------===//
9805 
9806 static SDValue getEstimate(const AArch64Subtarget *ST, unsigned Opcode,
9807  SDValue Operand, SelectionDAG &DAG,
9808  int &ExtraSteps) {
9809  EVT VT = Operand.getValueType();
9810  if ((ST->hasNEON() &&
9811  (VT == MVT::f64 || VT == MVT::v1f64 || VT == MVT::v2f64 ||
9812  VT == MVT::f32 || VT == MVT::v1f32 || VT == MVT::v2f32 ||
9813  VT == MVT::v4f32)) ||
9814  (ST->hasSVE() &&
9815  (VT == MVT::nxv8f16 || VT == MVT::nxv4f32 || VT == MVT::nxv2f64))) {
9816  if (ExtraSteps == TargetLoweringBase::ReciprocalEstimate::Unspecified)
9817  // For the reciprocal estimates, convergence is quadratic, so the number
9818  // of digits is doubled after each iteration. In ARMv8, the accuracy of
9819  // the initial estimate is 2^-8. Thus the number of extra steps to refine
9820  // the result for float (23 mantissa bits) is 2 and for double (52
9821  // mantissa bits) is 3.
9822  ExtraSteps = VT.getScalarType() == MVT::f64 ? 3 : 2;
9823 
9824  return DAG.getNode(Opcode, SDLoc(Operand), VT, Operand);
9825  }
9826 
9827  return SDValue();
9828 }
9829 
9830 SDValue
9831 AArch64TargetLowering::getSqrtInputTest(SDValue Op, SelectionDAG &DAG,
9832  const DenormalMode &Mode) const {
9833  SDLoc DL(Op);
9834  EVT VT = Op.getValueType();
9835  EVT CCVT = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
9836  SDValue FPZero = DAG.getConstantFP(0.0, DL, VT);
9837  return DAG.getSetCC(DL, CCVT, Op, FPZero, ISD::SETEQ);
9838 }
9839 
9840 SDValue
9841 AArch64TargetLowering::getSqrtResultForDenormInput(SDValue Op,
9842  SelectionDAG &DAG) const {
9843  return Op;
9844 }
9845 
9846 SDValue AArch64TargetLowering::getSqrtEstimate(SDValue Operand,
9847  SelectionDAG &DAG, int Enabled,
9848  int &ExtraSteps,
9849  bool &UseOneConst,
9850  bool Reciprocal) const {
9852  (Enabled == ReciprocalEstimate::Unspecified && Subtarget->useRSqrt()))
9853  if (SDValue Estimate = getEstimate(Subtarget, AArch64ISD::FRSQRTE, Operand,
9854  DAG, ExtraSteps)) {
9855  SDLoc DL(Operand);
9856  EVT VT = Operand.getValueType();
9857 
9859  Flags.setAllowReassociation(true);
9860 
9861  // Newton reciprocal square root iteration: E * 0.5 * (3 - X * E^2)
9862  // AArch64 reciprocal square root iteration instruction: 0.5 * (3 - M * N)
9863  for (int i = ExtraSteps; i > 0; --i) {
9864  SDValue Step = DAG.getNode(ISD::FMUL, DL, VT, Estimate, Estimate,
9865  Flags);
9866  Step = DAG.getNode(AArch64ISD::FRSQRTS, DL, VT, Operand, Step, Flags);
9867  Estimate = DAG.getNode(ISD::FMUL, DL, VT, Estimate, Step, Flags);
9868  }
9869  if (!Reciprocal)
9870  Estimate = DAG.getNode(ISD::FMUL, DL, VT, Operand, Estimate, Flags);
9871 
9872  ExtraSteps = 0;
9873  return Estimate;
9874  }
9875 
9876  return SDValue();
9877 }
9878 
9879 SDValue AArch64TargetLowering::getRecipEstimate(SDValue Operand,
9880  SelectionDAG &DAG, int Enabled,
9881  int &ExtraSteps) const {
9883  if (SDValue Estimate = getEstimate(Subtarget, AArch64ISD::FRECPE, Operand,
9884  DAG, ExtraSteps)) {
9885  SDLoc DL(Operand);
9886  EVT VT = Operand.getValueType();
9887 
9889  Flags.setAllowReassociation(true);
9890 
9891  // Newton reciprocal iteration: E * (2 - X * E)
9892  // AArch64 reciprocal iteration instruction: (2 - M * N)
9893  for (int i = ExtraSteps; i > 0; --i) {
9894  SDValue Step = DAG.getNode(AArch64ISD::FRECPS, DL, VT, Operand,
9895  Estimate, Flags);
9896  Estimate = DAG.getNode(ISD::FMUL, DL, VT, Estimate, Step, Flags);
9897  }
9898 
9899  ExtraSteps = 0;
9900  return Estimate;
9901  }
9902 
9903  return SDValue();
9904 }
9905 
9906 //===----------------------------------------------------------------------===//
9907 // AArch64 Inline Assembly Support
9908 //===----------------------------------------------------------------------===//
9909 
9910 // Table of Constraints
9911 // TODO: This is the current set of constraints supported by ARM for the
9912 // compiler, not all of them may make sense.
9913 //
9914 // r - A general register
9915 // w - An FP/SIMD register of some size in the range v0-v31
9916 // x - An FP/SIMD register of some size in the range v0-v15
9917 // I - Constant that can be used with an ADD instruction
9918 // J - Constant that can be used with a SUB instruction
9919 // K - Constant that can be used with a 32-bit logical instruction
9920 // L - Constant that can be used with a 64-bit logical instruction
9921 // M - Constant that can be used as a 32-bit MOV immediate
9922 // N - Constant that can be used as a 64-bit MOV immediate
9923 // Q - A memory reference with base register and no offset
9924 // S - A symbolic address
9925 // Y - Floating point constant zero
9926 // Z - Integer constant zero
9927 //
9928 // Note that general register operands will be output using their 64-bit x
9929 // register name, whatever the size of the variable, unless the asm operand
9930 // is prefixed by the %w modifier. Floating-point and SIMD register operands
9931 // will be output with the v prefix unless prefixed by the %b, %h, %s, %d or
9932 // %q modifier.
9933 const char *AArch64TargetLowering::LowerXConstraint(EVT ConstraintVT) const {
9934  // At this point, we have to lower this constraint to something else, so we
9935  // lower it to an "r" or "w". However, by doing this we will force the result
9936  // to be in register, while the X constraint is much more permissive.
9937  //
9938  // Although we are correct (we are free to emit anything, without
9939  // constraints), we might break use cases that would expect us to be more
9940  // efficient and emit something else.
9941  if (!Subtarget->hasFPARMv8())
9942  return "r";
9943 
9944  if (ConstraintVT.isFloatingPoint())
9945  return "w";
9946 
9947  if (ConstraintVT.isVector() &&
9948  (ConstraintVT.getSizeInBits() == 64 ||
9949  ConstraintVT.getSizeInBits() == 128))
9950  return "w";
9951 
9952  return "r";
9953 }
9954 
9958  Invalid
9959 };
9960 
9963  if (Constraint == "Upa")
9965  if (Constraint == "Upl")
9967  return P;
9968 }
9969 
9970 /// getConstraintType - Given a constraint letter, return the type of
9971 /// constraint it is for this target.
9973 AArch64TargetLowering::getConstraintType(StringRef Constraint) const {
9974  if (Constraint.size() == 1) {
9975  switch (Constraint[0]) {
9976  default:
9977  break;
9978  case 'x':
9979  case 'w':
9980  case 'y':
9981  return C_RegisterClass;
9982  // An address with a single base register. Due to the way we
9983  // currently handle addresses it is the same as 'r'.
9984  case 'Q':
9985  return C_Memory;
9986  case 'I':
9987  case 'J':
9988  case 'K':
9989  case 'L':
9990  case 'M':
9991  case 'N':
9992  case 'Y':
9993  case 'Z':
9994  return C_Immediate;
9995  case 'z':
9996  case 'S': // A symbolic address
9997  return C_Other;
9998  }
9999  } else if (parsePredicateConstraint(Constraint) !=
10001  return C_RegisterClass;
10002  return TargetLowering::getConstraintType(Constraint);
10003 }
10004 
10005 /// Examine constraint type and operand type and determine a weight value.
10006 /// This object must already have been set up with the operand type
10007 /// and the current alternative constraint selected.
10009 AArch64TargetLowering::getSingleConstraintMatchWeight(
10010  AsmOperandInfo &info, const char *constraint) const {
10011  ConstraintWeight weight = CW_Invalid;
10012  Value *CallOperandVal = info.CallOperandVal;
10013  // If we don't have a value, we can't do a match,
10014  // but allow it at the lowest weight.
10015  if (!CallOperandVal)
10016  return CW_Default;
10017  Type *type = CallOperandVal->getType();
10018  // Look at the constraint type.
10019  switch (*constraint) {
10020  default:
10022  break;
10023  case 'x':
10024  case 'w':
10025  case 'y':
10026  if (type->isFloatingPointTy() || type->isVectorTy())
10027  weight = CW_Register;
10028  break;
10029  case 'z':
10030  weight = CW_Constant;
10031  break;
10032  case 'U':
10034  weight = CW_Register;
10035  break;
10036  }
10037  return weight;
10038 }
10039 
10040 std::pair<unsigned, const TargetRegisterClass *>
10041 AArch64TargetLowering::getRegForInlineAsmConstraint(
10042  const TargetRegisterInfo *TRI, StringRef Constraint, MVT VT) const {
10043  if (Constraint.size() == 1) {
10044  switch (Constraint[0]) {
10045  case 'r':
10046  if (VT.isScalableVector())
10047  return std::make_pair(0U, nullptr);
10048  if (Subtarget->hasLS64() && VT.getSizeInBits() == 512)
10049  return std::make_pair(0U, &AArch64::GPR64x8ClassRegClass);
10050  if (VT.getFixedSizeInBits() == 64)
10051  return std::make_pair(0U, &AArch64::GPR64commonRegClass);
10052  return std::make_pair(0U, &AArch64::GPR32commonRegClass);
10053  case 'w': {
10054  if (!Subtarget->hasFPARMv8())
10055  break;
10056  if (VT.isScalableVector()) {
10057  if (VT.getVectorElementType() != MVT::i1)
10058  return std::make_pair(0U, &AArch64::ZPRRegClass);
10059  return std::make_pair(0U, nullptr);
10060  }
10061  uint64_t VTSize = VT.getFixedSizeInBits();
10062  if (VTSize == 16)
10063  return std::make_pair(0U, &AArch64::FPR16RegClass);
10064  if (VTSize == 32)
10065  return std::make_pair(0U, &AArch64::FPR32RegClass);
10066  if (VTSize == 64)
10067  return std::make_pair(0U, &AArch64::FPR64RegClass);
10068  if (VTSize == 128)
10069  return std::make_pair(0U, &AArch64::FPR128RegClass);
10070  break;
10071  }
10072  // The instructions that this constraint is designed for can
10073  // only take 128-bit registers so just use that regclass.
10074  case 'x':
10075  if (!Subtarget->hasFPARMv8())
10076  break;
10077  if (VT.isScalableVector())
10078  return std::make_pair(0U, &AArch64::ZPR_4bRegClass);
10079  if (VT.getSizeInBits() == 128)
10080  return std::make_pair(0U, &AArch64::FPR128_loRegClass);
10081  break;
10082  case 'y':
10083  if (!Subtarget->hasFPARMv8())
10084  break;
10085  if (VT.isScalableVector())
10086  return std::make_pair(0U, &AArch64::ZPR_3bRegClass);
10087  break;
10088  }
10089  } else {
10091  if (PC != PredicateConstraint::Invalid) {
10092  if (!VT.isScalableVector() || VT.getVectorElementType() != MVT::i1)
10093  return std::make_pair(0U, nullptr);
10094  bool restricted = (PC == PredicateConstraint::Upl);
10095  return restricted ? std::make_pair(0U, &AArch64::PPR_3bRegClass)
10096  : std::make_pair(0U, &AArch64::PPRRegClass);
10097  }
10098  }
10099  if (StringRef("{cc}").equals_insensitive(Constraint))
10100  return std::make_pair(unsigned(AArch64::NZCV), &AArch64::CCRRegClass);
10101 
10102  // Use the default implementation in TargetLowering to convert the register
10103  // constraint into a member of a register class.
10104  std::pair<unsigned, const TargetRegisterClass *> Res;
10105  Res = TargetLowering::getRegForInlineAsmConstraint(TRI, Constraint, VT);
10106 
10107  // Not found as a standard register?
10108  if (!Res.second) {
10109  unsigned Size = Constraint.size();
10110  if ((Size == 4 || Size == 5) && Constraint[0] == '{' &&
10111  tolower(Constraint[1]) == 'v' && Constraint[Size - 1] == '}') {
10112  int RegNo;
10113  bool Failed = Constraint.slice(2, Size - 1).getAsInteger(10, RegNo);
10114  if (!Failed && RegNo >= 0 && RegNo <= 31) {
10115  // v0 - v31 are aliases of q0 - q31 or d0 - d31 depending on size.
10116  // By default we'll emit v0-v31 for this unless there's a modifier where
10117  // we'll emit the correct register as well.
10118  if (VT != MVT::Other && VT.getSizeInBits() == 64) {
10119  Res.first = AArch64::FPR64RegClass.getRegister(RegNo);
10120  Res.second = &AArch64::FPR64RegClass;
10121  } else {
10122  Res.first = AArch64::FPR128RegClass.getRegister(RegNo);
10123  Res.second = &AArch64::FPR128RegClass;
10124  }
10125  }
10126  }
10127  }
10128 
10129  if (Res.second && !Subtarget->hasFPARMv8() &&
10130  !AArch64::GPR32allRegClass.hasSubClassEq(Res.second) &&
10131  !AArch64::GPR64allRegClass.hasSubClassEq(Res.second))
10132  return std::make_pair(0U, nullptr);
10133 
10134  return Res;
10135 }
10136 
10138  llvm::Type *Ty,
10139  bool AllowUnknown) const {
10140  if (Subtarget->hasLS64() && Ty->isIntegerTy(512))
10141  return EVT(MVT::i64x8);
10142 
10143  return TargetLowering::getAsmOperandValueType(DL, Ty, AllowUnknown);
10144 }
10145 
10146 /// LowerAsmOperandForConstraint - Lower the specified operand into the Ops
10147 /// vector. If it is invalid, don't add anything to Ops.
10148 void AArch64TargetLowering::LowerAsmOperandForConstraint(
10149  SDValue Op, std::string &Constraint, std::vector<SDValue> &Ops,
10150  SelectionDAG &DAG) const {
10151  SDValue Result;
10152 
10153  // Currently only support length 1 constraints.
10154  if (Constraint.length() != 1)
10155  return;
10156 
10157  char ConstraintLetter = Constraint[0];
10158  switch (ConstraintLetter) {
10159  default:
10160  break;
10161 
10162  // This set of constraints deal with valid constants for various instructions.
10163  // Validate and return a target constant for them if we can.
10164  case 'z': {
10165  // 'z' maps to xzr or wzr so it needs an input of 0.
10166  if (!isNullConstant(Op))
10167  return;
10168 
10169  if (Op.getValueType() == MVT::i64)
10170  Result = DAG.getRegister(AArch64::XZR, MVT::i64);
10171  else
10172  Result = DAG.getRegister(AArch64::WZR, MVT::i32);
10173  break;
10174  }
10175  case 'S': {
10176  // An absolute symbolic address or label reference.
10177  if (const GlobalAddressSDNode *GA = dyn_cast<GlobalAddressSDNode>(Op)) {
10178  Result = DAG.getTargetGlobalAddress(GA->getGlobal(), SDLoc(Op),
10179  GA->getValueType(0));
10180  } else if (const BlockAddressSDNode *BA =
10181  dyn_cast<BlockAddressSDNode>(Op)) {
10182  Result =
10184  } else
10185  return;
10186  break;
10187  }
10188 
10189  case 'I':
10190  case 'J':
10191  case 'K':
10192  case 'L':
10193  case 'M':
10194  case 'N':
10195  ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op);
10196  if (!C)
10197  return;
10198 
10199  // Grab the value and do some validation.
10200  uint64_t CVal = C->getZExtValue();
10201  switch (ConstraintLetter) {
10202  // The I constraint applies only to simple ADD or SUB immediate operands:
10203  // i.e. 0 to 4095 with optional shift by 12
10204  // The J constraint applies only to ADD or SUB immediates that would be
10205  // valid when negated, i.e. if [an add pattern] were to be output as a SUB
10206  // instruction [or vice versa], in other words -1 to -4095 with optional
10207  // left shift by 12.
10208  case 'I':
10209  if (isUInt<12>(CVal) || isShiftedUInt<12, 12>(CVal))
10210  break;
10211  return;
10212  case 'J': {
10213  uint64_t NVal = -C->getSExtValue();
10214  if (isUInt<12>(NVal) || isShiftedUInt<12, 12>(NVal)) {
10215  CVal = C->getSExtValue();
10216  break;
10217  }
10218  return;
10219  }
10220  // The K and L constraints apply *only* to logical immediates, including
10221  // what used to be the MOVI alias for ORR (though the MOVI alias has now
10222  // been removed and MOV should be used). So these constraints have to
10223  // distinguish between bit patterns that are valid 32-bit or 64-bit
10224  // "bitmask immediates": for example 0xaaaaaaaa is a valid bimm32 (K), but
10225  // not a valid bimm64 (L) where 0xaaaaaaaaaaaaaaaa would be valid, and vice
10226  // versa.
10227  case 'K':
10228  if (AArch64_AM::isLogicalImmediate(CVal, 32))
10229  break;
10230  return;
10231  case 'L':
10232  if (AArch64_AM::isLogicalImmediate(CVal, 64))
10233  break;
10234  return;
10235  // The M and N constraints are a superset of K and L respectively, for use
10236  // with the MOV (immediate) alias. As well as the logical immediates they
10237  // also match 32 or 64-bit immediates that can be loaded either using a
10238  // *single* MOVZ or MOVN , such as 32-bit 0x12340000, 0x00001234, 0xffffedca
10239  // (M) or 64-bit 0x1234000000000000 (N) etc.
10240  // As a note some of this code is liberally stolen from the asm parser.
10241  case 'M': {
10242  if (!isUInt<32>(CVal))
10243  return;
10244  if (AArch64_AM::isLogicalImmediate(CVal, 32))
10245  break;
10246  if ((CVal & 0xFFFF) == CVal)
10247  break;
10248  if ((CVal & 0xFFFF0000ULL) == CVal)
10249  break;
10250  uint64_t NCVal = ~(uint32_t)CVal;
10251  if ((NCVal & 0xFFFFULL) == NCVal)
10252  break;
10253  if ((NCVal & 0xFFFF0000ULL) == NCVal)
10254  break;
10255  return;
10256  }
10257  case 'N': {
10258  if (AArch64_AM::isLogicalImmediate(CVal, 64))
10259  break;
10260  if ((CVal & 0xFFFFULL) == CVal)
10261  break;
10262  if ((CVal & 0xFFFF0000ULL) == CVal)
10263  break;
10264  if ((CVal & 0xFFFF00000000ULL) == CVal)
10265  break;
10266  if ((CVal & 0xFFFF000000000000ULL) == CVal)
10267  break;
10268  uint64_t NCVal = ~CVal;
10269  if ((NCVal & 0xFFFFULL) == NCVal)
10270  break;
10271  if ((NCVal & 0xFFFF0000ULL) == NCVal)
10272  break;
10273  if ((NCVal & 0xFFFF00000000ULL) == NCVal)
10274  break;
10275  if ((NCVal & 0xFFFF000000000000ULL) == NCVal)
10276  break;
10277  return;
10278  }
10279  default:
10280  return;
10281  }
10282 
10283  // All assembler immediates are 64-bit integers.
10284  Result = DAG.getTargetConstant(CVal, SDLoc(Op), MVT::i64);
10285  break;
10286  }
10287 
10288  if (Result.getNode()) {
10289  Ops.push_back(Result);
10290  return;
10291  }
10292 
10293  return TargetLowering::LowerAsmOperandForConstraint(Op, Constraint, Ops, DAG);
10294 }
10295 
10296 //===----------------------------------------------------------------------===//
10297 // AArch64 Advanced SIMD Support
10298 //===----------------------------------------------------------------------===//
10299 
10300 /// WidenVector - Given a value in the V64 register class, produce the
10301 /// equivalent value in the V128 register class.
10302 static SDValue WidenVector(SDValue V64Reg, SelectionDAG &DAG) {
10303  EVT VT = V64Reg.getValueType();
10304  unsigned NarrowSize = VT.getVectorNumElements();
10305  MVT EltTy = VT.getVectorElementType().getSimpleVT();
10306  MVT WideTy = MVT::getVectorVT(EltTy, 2 * NarrowSize);
10307  SDLoc DL(V64Reg);
10308 
10309  return DAG.getNode(ISD::INSERT_SUBVECTOR, DL, WideTy, DAG.getUNDEF(WideTy),
10310  V64Reg, DAG.getConstant(0, DL, MVT::i64));
10311 }
10312 
10313 /// getExtFactor - Determine the adjustment factor for the position when
10314 /// generating an "extract from vector registers" instruction.
10315 static unsigned getExtFactor(SDValue &V) {
10316  EVT EltType = V.getValueType().getVectorElementType();
10317  return EltType.getSizeInBits() / 8;
10318 }
10319 
10320 /// NarrowVector - Given a value in the V128 register class, produce the
10321 /// equivalent value in the V64 register class.
10322 static SDValue NarrowVector(SDValue V128Reg, SelectionDAG &DAG) {
10323  EVT VT = V128Reg.getValueType();
10324  unsigned WideSize = VT.getVectorNumElements();
10325  MVT EltTy = VT.getVectorElementType().getSimpleVT();
10326  MVT NarrowTy = MVT::getVectorVT(EltTy, WideSize / 2);
10327  SDLoc DL(V128Reg);
10328 
10329  return DAG.getTargetExtractSubreg(AArch64::dsub, DL, NarrowTy, V128Reg);
10330 }
10331 
10332 // Gather data to see if the operation can be modelled as a
10333 // shuffle in combination with VEXTs.
10335  SelectionDAG &DAG) const {
10336  assert(Op.getOpcode() == ISD::BUILD_VECTOR && "Unknown opcode!");
10337  LLVM_DEBUG(dbgs() << "AArch64TargetLowering::ReconstructShuffle\n");
10338  SDLoc dl(Op);
10339  EVT VT = Op.getValueType();
10340  assert(!VT.isScalableVector() &&
10341  "Scalable vectors cannot be used with ISD::BUILD_VECTOR");
10342  unsigned NumElts = VT.getVectorNumElements();
10343 
10344  struct ShuffleSourceInfo {
10345  SDValue Vec;
10346  unsigned MinElt;
10347  unsigned MaxElt;
10348 
10349  // We may insert some combination of BITCASTs and VEXT nodes to force Vec to
10350  // be compatible with the shuffle we intend to construct. As a result
10351  // ShuffleVec will be some sliding window into the original Vec.
10352  SDValue ShuffleVec;
10353 
10354  // Code should guarantee that element i in Vec starts at element "WindowBase
10355  // + i * WindowScale in ShuffleVec".
10356  int WindowBase;
10357  int WindowScale;
10358 
10359  ShuffleSourceInfo(SDValue Vec)
10360  : Vec(Vec), MinElt(std::numeric_limits<unsigned>::max()), MaxElt(0),
10361  ShuffleVec(Vec), WindowBase(0), WindowScale(1) {}
10362 
10363  bool operator ==(SDValue OtherVec) { return Vec == OtherVec; }
10364  };
10365 
10366  // First gather all vectors used as an immediate source for this BUILD_VECTOR
10367  // node.
10369  for (unsigned i = 0; i < NumElts; ++i) {
10370  SDValue V = Op.getOperand(i);
10371  if (V.isUndef())
10372  continue;
10373  else if (V.getOpcode() != ISD::EXTRACT_VECTOR_ELT ||
10374  !isa<ConstantSDNode>(V.getOperand(1)) ||
10376  LLVM_DEBUG(
10377  dbgs() << "Reshuffle failed: "
10378  "a shuffle can only come from building a vector from "
10379  "various elements of other fixed-width vectors, provided "
10380  "their indices are constant\n");
10381  return SDValue();
10382  }
10383 
10384  // Add this element source to the list if it's not already there.
10385  SDValue SourceVec = V.getOperand(0);
10386  auto Source = find(Sources, SourceVec);
10387  if (Source == Sources.end())
10388  Source = Sources.insert(Sources.end(), ShuffleSourceInfo(SourceVec));
10389 
10390  // Update the minimum and maximum lane number seen.
10391  unsigned EltNo = cast<ConstantSDNode>(V.getOperand(1))->getZExtValue();
10392  Source->MinElt = std::min(Source->MinElt, EltNo);
10393  Source->MaxElt = std::max(Source->MaxElt, EltNo);
10394  }
10395 
10396  // If we have 3 or 4 sources, try to generate a TBL, which will at least be
10397  // better than moving to/from gpr registers for larger vectors.
10398  if ((Sources.size() == 3 || Sources.size() == 4) && NumElts > 4) {
10399  // Construct a mask for the tbl. We may need to adjust the index for types
10400  // larger than i8.
10402  unsigned OutputFactor = VT.getScalarSizeInBits() / 8;
10403  for (unsigned I = 0; I < NumElts; ++I) {
10404  SDValue V = Op.getOperand(I);
10405  if (V.isUndef()) {
10406  for (unsigned OF = 0; OF < OutputFactor; OF++)
10407  Mask.push_back(-1);
10408  continue;
10409  }
10410  // Set the Mask lanes adjusted for the size of the input and output
10411  // lanes. The Mask is always i8, so it will set OutputFactor lanes per
10412  // output element, adjusted in their positions per input and output types.
10413  unsigned Lane = V.getConstantOperandVal(1);
10414  for (unsigned S = 0; S < Sources.size(); S++) {
10415  if (V.getOperand(0) == Sources[S].Vec) {
10416  unsigned InputSize = Sources[S].Vec.getScalarValueSizeInBits();
10417  unsigned InputBase = 16 * S + Lane * InputSize / 8;
10418  for (unsigned OF = 0; OF < OutputFactor; OF++)
10419  Mask.push_back(InputBase + OF);
10420  break;
10421  }
10422  }
10423  }
10424 
10425  // Construct the tbl3/tbl4 out of an intrinsic, the sources converted to
10426  // v16i8, and the TBLMask
10427  SmallVector<SDValue, 16> TBLOperands;
10428  TBLOperands.push_back(DAG.getConstant(Sources.size() == 3
10429  ? Intrinsic::aarch64_neon_tbl3
10430  : Intrinsic::aarch64_neon_tbl4,
10431  dl, MVT::i32));
10432  for (unsigned i = 0; i < Sources.size(); i++) {
10433  SDValue Src = Sources[i].Vec;
10434  EVT SrcVT = Src.getValueType();
10435  Src = DAG.getBitcast(SrcVT.is64BitVector() ? MVT::v8i8 : MVT::v16i8, Src);
10436  assert((SrcVT.is64BitVector() || SrcVT.is128BitVector()) &&
10437  "Expected a legally typed vector");
10438  if (SrcVT.is64BitVector())
10439  Src = DAG.getNode(ISD::CONCAT_VECTORS, dl, MVT::v16i8, Src,
10440  DAG.getUNDEF(MVT::v8i8));
10441  TBLOperands.push_back(Src);
10442  }
10443 
10444  SmallVector<SDValue, 16> TBLMask;
10445  for (unsigned i = 0; i < Mask.size(); i++)
10446  TBLMask.push_back(DAG.getConstant(Mask[i], dl, MVT::i32));
10447  assert((Mask.size() == 8 || Mask.size() == 16) &&
10448  "Expected a v8i8 or v16i8 Mask");
10449  TBLOperands.push_back(
10450  DAG.getBuildVector(Mask.size() == 8 ? MVT::v8i8 : MVT::v16i8, dl, TBLMask));
10451 
10452  SDValue Shuffle =
10454  Mask.size() == 8 ? MVT::v8i8 : MVT::v16i8, TBLOperands);
10455  return DAG.getBitcast(VT, Shuffle);
10456  }
10457 
10458  if (Sources.size() > 2) {
10459  LLVM_DEBUG(dbgs() << "Reshuffle failed: currently only do something "
10460  << "sensible when at most two source vectors are "
10461  << "involved\n");
10462  return SDValue();
10463  }
10464 
10465  // Find out the smallest element size among result and two sources, and use
10466  // it as element size to build the shuffle_vector.
10467  EVT SmallestEltTy = VT.getVectorElementType();
10468  for (auto &Source : Sources) {
10469  EVT SrcEltTy = Source.Vec.getValueType().getVectorElementType();
10470  if (SrcEltTy.bitsLT(SmallestEltTy)) {
10471  SmallestEltTy = SrcEltTy;
10472  }
10473  }
10474  unsigned ResMultiplier =
10475  VT.getScalarSizeInBits() / SmallestEltTy.getFixedSizeInBits();
10476  uint64_t VTSize = VT.getFixedSizeInBits();
10477  NumElts = VTSize / SmallestEltTy.getFixedSizeInBits();
10478  EVT ShuffleVT = EVT::getVectorVT(*DAG.getContext(), SmallestEltTy, NumElts);
10479 
10480  // If the source vector is too wide or too narrow, we may nevertheless be able
10481  // to construct a compatible shuffle either by concatenating it with UNDEF or
10482  // extracting a suitable range of elements.
10483  for (auto &Src : Sources) {
10484  EVT SrcVT = Src.ShuffleVec.getValueType();
10485 
10486  TypeSize SrcVTSize = SrcVT.getSizeInBits();
10487  if (SrcVTSize == TypeSize::Fixed(VTSize))
10488  continue;
10489 
10490  // This stage of the search produces a source with the same element type as
10491  // the original, but with a total width matching the BUILD_VECTOR output.
10492  EVT EltVT = SrcVT.getVectorElementType();
10493  unsigned NumSrcElts = VTSize / EltVT.getFixedSizeInBits();
10494  EVT DestVT = EVT::getVectorVT(*DAG.getContext(), EltVT, NumSrcElts);
10495 
10496  if (SrcVTSize.getFixedValue() < VTSize) {
10497  assert(2 * SrcVTSize == VTSize);
10498  // We can pad out the smaller vector for free, so if it's part of a
10499  // shuffle...
10500  Src.ShuffleVec =
10501  DAG.getNode(ISD::CONCAT_VECTORS, dl, DestVT, Src.ShuffleVec,
10502  DAG.getUNDEF(Src.ShuffleVec.getValueType()));
10503  continue;
10504  }
10505 
10506  if (SrcVTSize.getFixedValue() != 2 * VTSize) {
10507  LLVM_DEBUG(
10508  dbgs() << "Reshuffle failed: result vector too small to extract\n");
10509  return SDValue();
10510  }
10511 
10512  if (Src.MaxElt - Src.MinElt >= NumSrcElts) {
10513  LLVM_DEBUG(
10514  dbgs() << "Reshuffle failed: span too large for a VEXT to cope\n");
10515  return SDValue();
10516  }
10517 
10518  if (Src.MinElt >= NumSrcElts) {
10519  // The extraction can just take the second half
10520  Src.ShuffleVec =
10521  DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, DestVT, Src.ShuffleVec,
10522  DAG.getConstant(NumSrcElts, dl, MVT::i64));
10523  Src.WindowBase = -NumSrcElts;
10524  } else if (Src.MaxElt < NumSrcElts) {
10525  // The extraction can just take the first half
10526  Src.ShuffleVec =
10527  DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, DestVT, Src.ShuffleVec,
10528  DAG.getConstant(0, dl, MVT::i64));
10529  } else {
10530  // An actual VEXT is needed
10531  SDValue VEXTSrc1 =
10532  DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, DestVT, Src.ShuffleVec,
10533  DAG.getConstant(0, dl, MVT::i64));
10534  SDValue VEXTSrc2 =
10535  DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, DestVT, Src.ShuffleVec,
10536  DAG.getConstant(NumSrcElts, dl, MVT::i64));
10537  unsigned Imm = Src.MinElt * getExtFactor(VEXTSrc1);
10538 
10539  if (!SrcVT.is64BitVector()) {
10540  LLVM_DEBUG(
10541  dbgs() << "Reshuffle failed: don't know how to lower AArch64ISD::EXT "
10542  "for SVE vectors.");
10543  return SDValue();
10544  }
10545 
10546  Src.ShuffleVec = DAG.getNode(AArch64ISD::EXT, dl, DestVT, VEXTSrc1,
10547  VEXTSrc2,
10548  DAG.getConstant(Imm, dl, MVT::i32));
10549  Src.WindowBase = -Src.MinElt;
10550  }
10551  }
10552 
10553  // Another possible incompatibility occurs from the vector element types. We
10554  // can fix this by bitcasting the source vectors to the same type we intend
10555  // for the shuffle.
10556  for (auto &Src : Sources) {
10557  EVT SrcEltTy = Src.ShuffleVec.getValueType().getVectorElementType();
10558  if (SrcEltTy == SmallestEltTy)
10559  continue;
10560  assert(ShuffleVT.getVectorElementType() == SmallestEltTy);
10561  Src.ShuffleVec = DAG.getNode(ISD::BITCAST, dl, ShuffleVT, Src.ShuffleVec);
10562  Src.WindowScale =
10563  SrcEltTy.getFixedSizeInBits() / SmallestEltTy.getFixedSizeInBits();
10564  Src.WindowBase *= Src.WindowScale;
10565  }
10566 
10567  // Final check before we try to actually produce a shuffle.
10568  LLVM_DEBUG(for (auto Src
10569  : Sources)
10570  assert(Src.ShuffleVec.getValueType() == ShuffleVT););
10571 
10572  // The stars all align, our next step is to produce the mask for the shuffle.
10574  int BitsPerShuffleLane = ShuffleVT.getScalarSizeInBits();
10575  for (unsigned i = 0; i < VT.getVectorNumElements(); ++i) {
10576  SDValue Entry = Op.getOperand(i);
10577  if (Entry.isUndef())
10578  continue;
10579 
10580  auto Src = find(Sources, Entry.getOperand(0));
10581  int EltNo = cast<ConstantSDNode>(Entry.getOperand(1))->getSExtValue();
10582 
10583  // EXTRACT_VECTOR_ELT performs an implicit any_ext; BUILD_VECTOR an implicit
10584  // trunc. So only std::min(SrcBits, DestBits) actually get defined in this
10585  // segment.
10586  EVT OrigEltTy = Entry.getOperand(0).getValueType().getVectorElementType();
10587  int BitsDefined = std::min(OrigEltTy.getScalarSizeInBits(),
10588  VT.getScalarSizeInBits());
10589  int LanesDefined = BitsDefined / BitsPerShuffleLane;
10590 
10591  // This source is expected to fill ResMultiplier lanes of the final shuffle,
10592  // starting at the appropriate offset.
10593  int *LaneMask = &Mask[i * ResMultiplier];
10594 
10595  int ExtractBase = EltNo * Src->WindowScale + Src->WindowBase;
10596  ExtractBase += NumElts * (Src - Sources.begin());
10597  for (int j = 0; j < LanesDefined; ++j)
10598  LaneMask[j] = ExtractBase + j;
10599  }
10600 
10601  // Final check before we try to produce nonsense...
10602  if (!isShuffleMaskLegal(Mask, ShuffleVT)) {
10603  LLVM_DEBUG(dbgs() << "Reshuffle failed: illegal shuffle mask\n");
10604  return SDValue();
10605  }
10606 
10607  SDValue ShuffleOps[] = { DAG.getUNDEF(ShuffleVT), DAG.getUNDEF(ShuffleVT) };
10608  for (unsigned i = 0; i < Sources.size(); ++i)
10609  ShuffleOps[i] = Sources[i].ShuffleVec;
10610 
10611  SDValue Shuffle = DAG.getVectorShuffle(ShuffleVT, dl, ShuffleOps[0],
10612  ShuffleOps[1], Mask);
10613  SDValue V = DAG.getNode(ISD::BITCAST, dl, VT, Shuffle);
10614 
10615  LLVM_DEBUG(dbgs() << "Reshuffle, creating node: "; Shuffle.dump();
10616  dbgs() << "Reshuffle, creating node: "; V.dump(););
10617 
10618  return V;
10619 }
10620 
10621 // check if an EXT instruction can handle the shuffle mask when the
10622 // vector sources of the shuffle are the same.
10623 static bool isSingletonEXTMask(ArrayRef<int> M, EVT VT, unsigned &Imm) {
10624  unsigned NumElts = VT.getVectorNumElements();
10625 
10626  // Assume that the first shuffle index is not UNDEF. Fail if it is.
10627  if (M[0] < 0)
10628  return false;
10629 
10630  Imm = M[0];
10631 
10632  // If this is a VEXT shuffle, the immediate value is the index of the first
10633  // element. The other shuffle indices must be the successive elements after
10634  // the first one.
10635  unsigned ExpectedElt = Imm;
10636  for (unsigned i = 1; i < NumElts; ++i) {
10637  // Increment the expected index. If it wraps around, just follow it
10638  // back to index zero and keep going.
10639  ++ExpectedElt;
10640  if (ExpectedElt == NumElts)
10641  ExpectedElt = 0;
10642 
10643  if (M[i] < 0)
10644  continue; // ignore UNDEF indices
10645  if (ExpectedElt != static_cast<unsigned>(M[i]))
10646  return false;
10647  }
10648 
10649  return true;
10650 }
10651 
10652 // Detect patterns of a0,a1,a2,a3,b0,b1,b2,b3,c0,c1,c2,c3,d0,d1,d2,d3 from
10653 // v4i32s. This is really a truncate, which we can construct out of (legal)
10654 // concats and truncate nodes.
10656  if (V.getValueType() != MVT::v16i8)
10657  return SDValue();
10658  assert(V.getNumOperands() == 16 && "Expected 16 operands on the BUILDVECTOR");
10659 
10660  for (unsigned X = 0; X < 4; X++) {
10661  // Check the first item in each group is an extract from lane 0 of a v4i32
10662  // or v4i16.
10663  SDValue BaseExt = V.getOperand(X * 4);
10664  if (BaseExt.getOpcode() != ISD::EXTRACT_VECTOR_ELT ||
10665  (BaseExt.getOperand(0).getValueType() != MVT::v4i16 &&
10666  BaseExt.getOperand(0).getValueType() != MVT::v4i32) ||
10667  !isa<ConstantSDNode>(BaseExt.getOperand(1)) ||
10668  BaseExt.getConstantOperandVal(1) != 0)
10669  return SDValue();
10670  SDValue Base = BaseExt.getOperand(0);
10671  // And check the other items are extracts from the same vector.
10672  for (unsigned Y = 1; Y < 4; Y++) {
10673  SDValue Ext = V.getOperand(X * 4 + Y);
10674  if (Ext.getOpcode() != ISD::EXTRACT_VECTOR_ELT ||
10675  Ext.getOperand(0) != Base ||
10676  !isa<ConstantSDNode>(Ext.getOperand(1)) ||
10677  Ext.getConstantOperandVal(1) != Y)
10678  return SDValue();
10679  }
10680  }
10681 
10682  // Turn the buildvector into a series of truncates and concates, which will
10683  // become uzip1's. Any v4i32s we found get truncated to v4i16, which are
10684  // concat together to produce 2 v8i16. These are both truncated and concat
10685  // together.
10686  SDLoc DL(V);
10687  SDValue Trunc[4] = {
10688  V.getOperand(0).getOperand(0), V.getOperand(4).getOperand(0),
10689  V.getOperand(8).getOperand(0), V.getOperand(12).getOperand(0)};
10690  for (SDValue &V : Trunc)
10691  if (V.getValueType() == MVT::v4i32)
10692  V = DAG.getNode(ISD::TRUNCATE, DL, MVT::v4i16, V);
10693  SDValue Concat0 =
10694  DAG.getNode(ISD::CONCAT_VECTORS, DL, MVT::v8i16, Trunc[0], Trunc[1]);
10695  SDValue Concat1 =
10696  DAG.getNode(ISD::CONCAT_VECTORS, DL, MVT::v8i16, Trunc[2], Trunc[3]);
10697  SDValue Trunc0 = DAG.getNode(ISD::TRUNCATE, DL, MVT::v8i8, Concat0);
10698  SDValue Trunc1 = DAG.getNode(ISD::TRUNCATE, DL, MVT::v8i8, Concat1);
10699  return DAG.getNode(ISD::CONCAT_VECTORS, DL, MVT::v16i8, Trunc0, Trunc1);
10700 }
10701 
10702 /// Check if a vector shuffle corresponds to a DUP instructions with a larger
10703 /// element width than the vector lane type. If that is the case the function
10704 /// returns true and writes the value of the DUP instruction lane operand into
10705 /// DupLaneOp
10706 static bool isWideDUPMask(ArrayRef<int> M, EVT VT, unsigned BlockSize,
10707  unsigned &DupLaneOp) {
10708  assert((BlockSize == 16 || BlockSize == 32 || BlockSize == 64) &&
10709  "Only possible block sizes for wide DUP are: 16, 32, 64");
10710 
10711  if (BlockSize <= VT.getScalarSizeInBits())
10712  return false;
10713  if (BlockSize % VT.getScalarSizeInBits() != 0)
10714  return false;
10715  if (VT.getSizeInBits() % BlockSize != 0)
10716  return false;
10717 
10718  size_t SingleVecNumElements = VT.getVectorNumElements();
10719  size_t NumEltsPerBlock = BlockSize / VT.getScalarSizeInBits();
10720  size_t NumBlocks = VT.getSizeInBits() / BlockSize;
10721 
10722  // We are looking for masks like
10723  // [0, 1, 0, 1] or [2, 3, 2, 3] or [4, 5, 6, 7, 4, 5, 6, 7] where any element
10724  // might be replaced by 'undefined'. BlockIndices will eventually contain
10725  // lane indices of the duplicated block (i.e. [0, 1], [2, 3] and [4, 5, 6, 7]
10726  // for the above examples)
10727  SmallVector<int, 8> BlockElts(NumEltsPerBlock, -1);
10728  for (size_t BlockIndex = 0; BlockIndex < NumBlocks; BlockIndex++)
10729  for (size_t I = 0; I < NumEltsPerBlock; I++) {
10730  int Elt = M[BlockIndex * NumEltsPerBlock + I];
10731  if (Elt < 0)
10732  continue;
10733  // For now we don't support shuffles that use the second operand
10734  if ((unsigned)Elt >= SingleVecNumElements)
10735  return false;
10736  if (BlockElts[I] < 0)
10737  BlockElts[I] = Elt;
10738  else if (BlockElts[I] != Elt)
10739  return false;
10740  }
10741 
10742  // We found a candidate block (possibly with some undefs). It must be a
10743  // sequence of consecutive integers starting with a value divisible by
10744  // NumEltsPerBlock with some values possibly replaced by undef-s.
10745 
10746  // Find first non-undef element
10747  auto FirstRealEltIter = find_if(BlockElts, [](int Elt) { return Elt >= 0; });
10748  assert(FirstRealEltIter != BlockElts.end() &&
10749  "Shuffle with all-undefs must have been caught by previous cases, "
10750  "e.g. isSplat()");
10751  if (FirstRealEltIter == BlockElts.end()) {
10752  DupLaneOp = 0;
10753  return true;
10754  }
10755 
10756  // Index of FirstRealElt in BlockElts
10757  size_t FirstRealIndex = FirstRealEltIter - BlockElts.begin();
10758 
10759  if ((unsigned)*FirstRealEltIter < FirstRealIndex)
10760  return false;
10761  // BlockElts[0] must have the following value if it isn't undef:
10762  size_t Elt0 = *FirstRealEltIter - FirstRealIndex;
10763 
10764  // Check the first element
10765  if (Elt0 % NumEltsPerBlock != 0)
10766  return false;
10767  // Check that the sequence indeed consists of consecutive integers (modulo
10768  // undefs)
10769  for (size_t I = 0; I < NumEltsPerBlock; I++)
10770  if (BlockElts[I] >= 0 && (unsigned)BlockElts[I] != Elt0 + I)
10771  return false;
10772 
10773  DupLaneOp = Elt0 / NumEltsPerBlock;
10774  return true;
10775 }
10776 
10777 // check if an EXT instruction can handle the shuffle mask when the
10778 // vector sources of the shuffle are different.
10779 static bool isEXTMask(ArrayRef<int> M, EVT VT, bool &ReverseEXT,
10780  unsigned &Imm) {
10781  // Look for the first non-undef element.
10782  const int *FirstRealElt = find_if(M, [](int Elt) { return Elt >= 0; });
10783 
10784  // Benefit form APInt to handle overflow when calculating expected element.
10785  unsigned NumElts = VT.getVectorNumElements();
10786  unsigned MaskBits = APInt(32, NumElts * 2).logBase2();
10787  APInt ExpectedElt = APInt(MaskBits, *FirstRealElt + 1);
10788  // The following shuffle indices must be the successive elements after the
10789  // first real element.
10790  bool FoundWrongElt = std::any_of(FirstRealElt + 1, M.end(), [&](int Elt) {
10791  return Elt != ExpectedElt++ && Elt != -1;
10792  });
10793  if (FoundWrongElt)
10794  return false;
10795 
10796  // The index of an EXT is the first element if it is not UNDEF.
10797  // Watch out for the beginning UNDEFs. The EXT index should be the expected
10798  // value of the first element. E.g.
10799  // <-1, -1, 3, ...> is treated as <1, 2, 3, ...>.
10800  // <-1, -1, 0, 1, ...> is treated as <2*NumElts-2, 2*NumElts-1, 0, 1, ...>.
10801  // ExpectedElt is the last mask index plus 1.
10802  Imm = ExpectedElt.getZExtValue();
10803 
10804  // There are two difference cases requiring to reverse input vectors.
10805  // For example, for vector <4 x i32> we have the following cases,
10806  // Case 1: shufflevector(<4 x i32>,<4 x i32>,<-1, -1, -1, 0>)
10807  // Case 2: shufflevector(<4 x i32>,<4 x i32>,<-1, -1, 7, 0>)
10808  // For both cases, we finally use mask <5, 6, 7, 0>, which requires
10809  // to reverse two input vectors.
10810  if (Imm < NumElts)
10811  ReverseEXT = true;
10812  else
10813  Imm -= NumElts;
10814 
10815  return true;
10816 }
10817 
10818 /// isREVMask - Check if a vector shuffle corresponds to a REV
10819 /// instruction with the specified blocksize. (The order of the elements
10820 /// within each block of the vector is reversed.)
10821 static bool isREVMask(ArrayRef<int> M, EVT VT, unsigned BlockSize) {
10822  assert((BlockSize == 16 || BlockSize == 32 || BlockSize == 64 ||
10823  BlockSize == 128) &&
10824  "Only possible block sizes for REV are: 16, 32, 64, 128");
10825 
10826  unsigned EltSz = VT.getScalarSizeInBits();
10827  unsigned NumElts = VT.getVectorNumElements();
10828  unsigned BlockElts = M[0] + 1;
10829  // If the first shuffle index is UNDEF, be optimistic.
10830  if (M[0] < 0)
10831  BlockElts = BlockSize / EltSz;
10832 
10833  if (BlockSize <= EltSz || BlockSize != BlockElts * EltSz)
10834  return false;
10835 
10836  for (unsigned i = 0; i < NumElts; ++i) {
10837  if (M[i] < 0)
10838  continue; // ignore UNDEF indices
10839  if ((unsigned)M[i] != (i - i % BlockElts) + (BlockElts - 1 - i % BlockElts))
10840  return false;
10841  }
10842 
10843  return true;
10844 }
10845 
10846 static bool isZIPMask(ArrayRef<int> M, EVT VT, unsigned &WhichResult) {
10847  unsigned NumElts = VT.getVectorNumElements();
10848  if (NumElts % 2 != 0)
10849  return false;
10850  WhichResult = (M[0] == 0 ? 0 : 1);
10851  unsigned Idx = WhichResult * NumElts / 2;
10852  for (unsigned i = 0; i != NumElts; i += 2) {
10853  if ((M[i] >= 0 && (unsigned)M[i] != Idx) ||
10854  (M[i + 1] >= 0 && (unsigned)M[i + 1] != Idx + NumElts))
10855  return false;
10856  Idx += 1;
10857  }
10858 
10859  return true;
10860 }
10861 
10862 static bool isUZPMask(ArrayRef<int> M, EVT VT, unsigned &WhichResult) {
10863  unsigned NumElts = VT.getVectorNumElements();
10864  WhichResult = (M[0] == 0 ? 0 : 1);
10865  for (unsigned i = 0; i != NumElts; ++i) {
10866  if (M[i] < 0)
10867  continue; // ignore UNDEF indices
10868  if ((unsigned)M[i] != 2 * i + WhichResult)
10869  return false;
10870  }
10871 
10872  return true;
10873 }
10874 
10875 static bool isTRNMask(ArrayRef<int> M, EVT VT, unsigned &WhichResult) {
10876  unsigned NumElts = VT.getVectorNumElements();
10877  if (NumElts % 2 != 0)
10878  return false;
10879  WhichResult = (M[0] == 0 ? 0 : 1);
10880  for (unsigned i = 0; i < NumElts; i += 2) {
10881  if ((M[i] >= 0 && (unsigned)M[i] != i + WhichResult) ||
10882  (M[i + 1] >= 0 && (unsigned)M[i + 1] != i + NumElts + WhichResult))
10883  return false;
10884  }
10885  return true;
10886 }
10887 
10888 /// isZIP_v_undef_Mask - Special case of isZIPMask for canonical form of
10889 /// "vector_shuffle v, v", i.e., "vector_shuffle v, undef".
10890 /// Mask is e.g., <0, 0, 1, 1> instead of <0, 4, 1, 5>.
10891 static bool isZIP_v_undef_Mask(ArrayRef<int> M, EVT VT, unsigned &WhichResult) {
10892  unsigned NumElts = VT.getVectorNumElements();
10893  if (NumElts % 2 != 0)
10894  return false;
10895  WhichResult = (M[0] == 0 ? 0 : 1);
10896  unsigned Idx = WhichResult * NumElts / 2;
10897  for (unsigned i = 0; i != NumElts; i += 2) {
10898  if ((M[i] >= 0 && (unsigned)M[i] != Idx) ||
10899  (M[i + 1] >= 0 && (unsigned)M[i + 1] != Idx))
10900  return false;
10901  Idx += 1;
10902  }
10903 
10904  return true;
10905 }
10906 
10907 /// isUZP_v_undef_Mask - Special case of isUZPMask for canonical form of
10908 /// "vector_shuffle v, v", i.e., "vector_shuffle v, undef".
10909 /// Mask is e.g., <0, 2, 0, 2> instead of <0, 2, 4, 6>,
10910 static bool isUZP_v_undef_Mask(ArrayRef<int> M, EVT VT, unsigned &WhichResult) {
10911  unsigned Half = VT.getVectorNumElements() / 2;
10912  WhichResult = (M[0] == 0 ? 0 : 1);
10913  for (unsigned j = 0; j != 2; ++j) {
10914  unsigned Idx = WhichResult;
10915  for (unsigned i = 0; i != Half; ++i) {
10916  int MIdx = M[i + j * Half];
10917  if (MIdx >= 0 && (unsigned)MIdx != Idx)
10918  return false;
10919  Idx += 2;
10920  }
10921  }
10922 
10923  return true;
10924 }
10925 
10926 /// isTRN_v_undef_Mask - Special case of isTRNMask for canonical form of
10927 /// "vector_shuffle v, v", i.e., "vector_shuffle v, undef".
10928 /// Mask is e.g., <0, 0, 2, 2> instead of <0, 4, 2, 6>.
10929 static bool isTRN_v_undef_Mask(ArrayRef<int> M, EVT VT, unsigned &WhichResult) {
10930  unsigned NumElts = VT.getVectorNumElements();
10931  if (NumElts % 2 != 0)
10932  return false;
10933  WhichResult = (M[0] == 0 ? 0 : 1);
10934  for (unsigned i = 0; i < NumElts; i += 2) {
10935  if ((M[i] >= 0 && (unsigned)M[i] != i + WhichResult) ||
10936  (M[i + 1] >= 0 && (unsigned)M[i + 1] != i + WhichResult))
10937  return false;
10938  }
10939  return true;
10940 }
10941 
10942 static bool isINSMask(ArrayRef<int> M, int NumInputElements,
10943  bool &DstIsLeft, int &Anomaly) {
10944  if (M.size() != static_cast<size_t>(NumInputElements))
10945  return false;
10946 
10947  int NumLHSMatch = 0, NumRHSMatch = 0;
10948  int LastLHSMismatch = -1, LastRHSMismatch = -1;
10949 
10950  for (int i = 0; i < NumInputElements; ++i) {
10951  if (M[i] == -1) {
10952  ++NumLHSMatch;
10953  ++NumRHSMatch;
10954  continue;
10955  }
10956 
10957  if (M[i] == i)
10958  ++NumLHSMatch;
10959  else
10960  LastLHSMismatch = i;
10961 
10962  if (M[i] == i + NumInputElements)
10963  ++NumRHSMatch;
10964  else
10965  LastRHSMismatch = i;
10966  }
10967 
10968  if (NumLHSMatch == NumInputElements - 1) {
10969  DstIsLeft = true;
10970  Anomaly = LastLHSMismatch;
10971  return true;
10972  } else if (NumRHSMatch == NumInputElements - 1) {
10973  DstIsLeft = false;
10974  Anomaly = LastRHSMismatch;
10975  return true;
10976  }
10977 
10978  return false;
10979 }
10980 
10981 static bool isConcatMask(ArrayRef<int> Mask, EVT VT, bool SplitLHS) {
10982  if (VT.getSizeInBits() != 128)
10983  return false;
10984 
10985  unsigned NumElts = VT.getVectorNumElements();
10986 
10987  for (int I = 0, E = NumElts / 2; I != E; I++) {
10988  if (Mask[I] != I)
10989  return false;
10990  }
10991 
10992  int Offset = NumElts / 2;
10993  for (int I = NumElts / 2, E = NumElts; I != E; I++) {
10994  if (Mask[I] != I + SplitLHS * Offset)
10995  return false;
10996  }
10997 
10998  return true;
10999 }
11000 
11002  SDLoc DL(Op);
11003  EVT VT = Op.getValueType();
11004  SDValue V0 = Op.getOperand(0);
11005  SDValue V1 = Op.getOperand(1);
11006  ArrayRef<int> Mask = cast<ShuffleVectorSDNode>(Op)->getMask();
11007 
11010  return SDValue();
11011 
11012  bool SplitV0 = V0.getValueSizeInBits() == 128;
11013 
11014  if (!isConcatMask(Mask, VT, SplitV0))
11015  return SDValue();
11016 
11017  EVT CastVT = VT.getHalfNumVectorElementsVT(*DAG.getContext());
11018  if (SplitV0) {
11019  V0 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, CastVT, V0,
11020  DAG.getConstant(0, DL, MVT::i64));
11021  }
11022  if (V1.getValueSizeInBits() == 128) {
11023  V1 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, CastVT, V1,
11024  DAG.getConstant(0, DL, MVT::i64));
11025  }
11026  return DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, V0, V1);
11027 }
11028 
11029 /// GeneratePerfectShuffle - Given an entry in the perfect-shuffle table, emit
11030 /// the specified operations to build the shuffle. ID is the perfect-shuffle
11031 //ID, V1 and V2 are the original shuffle inputs. PFEntry is the Perfect shuffle
11032 //table entry and LHS/RHS are the immediate inputs for this stage of the
11033 //shuffle.
11035  SDValue V2, unsigned PFEntry, SDValue LHS,
11036  SDValue RHS, SelectionDAG &DAG,
11037  const SDLoc &dl) {
11038  unsigned OpNum = (PFEntry >> 26) & 0x0F;
11039  unsigned LHSID = (PFEntry >> 13) & ((1 << 13) - 1);
11040  unsigned RHSID = (PFEntry >> 0) & ((1 << 13) - 1);
11041 
11042  enum {
11043  OP_COPY = 0, // Copy, used for things like <u,u,u,3> to say it is <0,1,2,3>
11044  OP_VREV,
11045  OP_VDUP0,
11046  OP_VDUP1,
11047  OP_VDUP2,
11048  OP_VDUP3,
11049  OP_VEXT1,
11050  OP_VEXT2,
11051  OP_VEXT3,
11052  OP_VUZPL, // VUZP, left result
11053  OP_VUZPR, // VUZP, right result
11054  OP_VZIPL, // VZIP, left result
11055  OP_VZIPR, // VZIP, right result
11056  OP_VTRNL, // VTRN, left result
11057  OP_VTRNR, // VTRN, right result
11058  OP_MOVLANE // Move lane. RHSID is the lane to move into
11059  };
11060 
11061  if (OpNum == OP_COPY) {
11062  if (LHSID == (1 * 9 + 2) * 9 + 3)
11063  return LHS;
11064  assert(LHSID == ((4 * 9 + 5) * 9 + 6) * 9 + 7 && "Illegal OP_COPY!");
11065  return RHS;
11066  }
11067 
11068  if (OpNum == OP_MOVLANE) {
11069  // Decompose a PerfectShuffle ID to get the Mask for lane Elt
11070  auto getPFIDLane = [](unsigned ID, int Elt) -> int {
11071  assert(Elt < 4 && "Expected Perfect Lanes to be less than 4");
11072  Elt = 3 - Elt;
11073  while (Elt > 0) {
11074  ID /= 9;
11075  Elt--;
11076  }
11077  return (ID % 9 == 8) ? -1 : ID % 9;
11078  };
11079 
11080  // For OP_MOVLANE shuffles, the RHSID represents the lane to move into. We
11081  // get the lane to move from from the PFID, which is always from the
11082  // original vectors (V1 or V2).
11084  LHSID, V1, V2, PerfectShuffleTable[LHSID], LHS, RHS, DAG, dl);
11085  EVT VT = OpLHS.getValueType();
11086  assert(RHSID < 8 && "Expected a lane index for RHSID!");
11087  unsigned ExtLane = 0;
11088  SDValue Input;
11089 
11090  // OP_MOVLANE are either D movs (if bit 0x4 is set) or S movs. D movs
11091  // convert into a higher type.
11092  if (RHSID & 0x4) {
11093  int MaskElt = getPFIDLane(ID, (RHSID & 0x01) << 1) >> 1;
11094  if (MaskElt == -1)
11095  MaskElt = (getPFIDLane(ID, ((RHSID & 0x01) << 1) + 1) - 1) >> 1;
11096  assert(MaskElt >= 0 && "Didn't expect an undef movlane index!");
11097  ExtLane = MaskElt < 2 ? MaskElt : (MaskElt - 2);
11098  Input = MaskElt < 2 ? V1 : V2;
11099  if (VT.getScalarSizeInBits() == 16) {
11100  Input = DAG.getBitcast(MVT::v2f32, Input);
11101  OpLHS = DAG.getBitcast(MVT::v2f32, OpLHS);
11102  } else {
11103  assert(VT.getScalarSizeInBits() == 32 &&
11104  "Expected 16 or 32 bit shuffle elemements");
11105  Input = DAG.getBitcast(MVT::v2f64, Input);
11106  OpLHS = DAG.getBitcast(MVT::v2f64, OpLHS);
11107  }
11108  } else {
11109  int MaskElt = getPFIDLane(ID, RHSID);
11110  assert(MaskElt >= 0 && "Didn't expect an undef movlane index!");
11111  ExtLane = MaskElt < 4 ? MaskElt : (MaskElt - 4);
11112  Input = MaskElt < 4 ? V1 : V2;
11113  // Be careful about creating illegal types. Use f16 instead of i16.
11114  if (VT == MVT::v4i16) {
11115  Input = DAG.getBitcast(MVT::v4f16, Input);
11116  OpLHS = DAG.getBitcast(MVT::v4f16, OpLHS);
11117  }
11118  }
11121  Input, DAG.getVectorIdxConstant(ExtLane, dl));
11122  SDValue Ins =
11123  DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, Input.getValueType(), OpLHS,
11124  Ext, DAG.getVectorIdxConstant(RHSID & 0x3, dl));
11125  return DAG.getBitcast(VT, Ins);
11126  }
11127 
11128  SDValue OpLHS, OpRHS;
11129  OpLHS = GeneratePerfectShuffle(LHSID, V1, V2, PerfectShuffleTable[LHSID], LHS,
11130  RHS, DAG, dl);
11131  OpRHS = GeneratePerfectShuffle(RHSID, V1, V2, PerfectShuffleTable[RHSID], LHS,
11132  RHS, DAG, dl);
11133  EVT VT = OpLHS.getValueType();
11134 
11135  switch (OpNum) {
11136  default:
11137  llvm_unreachable("Unknown shuffle opcode!");
11138  case OP_VREV:
11139  // VREV divides the vector in half and swaps within the half.
11140  if (VT.getVectorElementType() == MVT::i32 ||
11142  return DAG.getNode(AArch64ISD::REV64, dl, VT, OpLHS);
11143  // vrev <4 x i16> -> REV32
11144  if (VT.getVectorElementType() == MVT::i16 ||
11145  VT.getVectorElementType() == MVT::f16 ||
11147  return DAG.getNode(AArch64ISD::REV32, dl, VT, OpLHS);
11148  // vrev <4 x i8> -> REV16
11150  return DAG.getNode(AArch64ISD::REV16, dl, VT, OpLHS);
11151  case OP_VDUP0:
11152  case OP_VDUP1:
11153  case OP_VDUP2:
11154  case OP_VDUP3: {
11155  EVT EltTy = VT.getVectorElementType();
11156  unsigned Opcode;
11157  if (EltTy == MVT::i8)
11158  Opcode = AArch64ISD::DUPLANE8;
11159  else if (EltTy == MVT::i16 || EltTy == MVT::f16 || EltTy == MVT::bf16)
11160  Opcode = AArch64ISD::DUPLANE16;
11161  else if (EltTy == MVT::i32 || EltTy == MVT::f32)
11162  Opcode = AArch64ISD::DUPLANE32;
11163  else if (EltTy == MVT::i64 || EltTy == MVT::f64)
11164  Opcode = AArch64ISD::DUPLANE64;
11165  else
11166  llvm_unreachable("Invalid vector element type?");
11167 
11168  if (VT.getSizeInBits() == 64)
11169  OpLHS = WidenVector(OpLHS, DAG);
11170  SDValue Lane = DAG.getConstant(OpNum - OP_VDUP0, dl, MVT::i64);
11171  return DAG.getNode(Opcode, dl, VT, OpLHS, Lane);
11172  }
11173  case OP_VEXT1:
11174  case OP_VEXT2:
11175  case OP_VEXT3: {
11176  unsigned Imm = (OpNum - OP_VEXT1 + 1) * getExtFactor(OpLHS);
11177  return DAG.getNode(AArch64ISD::EXT, dl, VT, OpLHS, OpRHS,
11178  DAG.getConstant(Imm, dl, MVT::i32));
11179  }
11180  case OP_VUZPL:
11181  return DAG.getNode(AArch64ISD::UZP1, dl, DAG.getVTList(VT, VT), OpLHS,
11182  OpRHS);
11183  case OP_VUZPR:
11184  return DAG.getNode(AArch64ISD::UZP2, dl, DAG.getVTList(VT, VT), OpLHS,
11185  OpRHS);
11186  case OP_VZIPL:
11187  return DAG.getNode(AArch64ISD::ZIP1, dl, DAG.getVTList(VT, VT), OpLHS,
11188  OpRHS);
11189  case OP_VZIPR:
11190  return DAG.getNode(AArch64ISD::ZIP2, dl, DAG.getVTList(VT, VT), OpLHS,
11191  OpRHS);
11192  case OP_VTRNL:
11193  return DAG.getNode(AArch64ISD::TRN1, dl, DAG.getVTList(VT, VT), OpLHS,
11194  OpRHS);
11195  case OP_VTRNR:
11196  return DAG.getNode(AArch64ISD::TRN2, dl, DAG.getVTList(VT, VT), OpLHS,
11197  OpRHS);
11198  }
11199 }
11200 
11202  SelectionDAG &DAG) {
11203  // Check to see if we can use the TBL instruction.
11204  SDValue V1 = Op.getOperand(0);
11205  SDValue V2 = Op.getOperand(1);
11206  SDLoc DL(Op);
11207 
11208  EVT EltVT = Op.getValueType().getVectorElementType();
11209  unsigned BytesPerElt = EltVT.getSizeInBits() / 8;
11210 
11211  bool Swap = false;
11212  if (V1.isUndef() || isZerosVector(V1.getNode())) {
11213  std::swap(V1, V2);
11214  Swap = true;
11215  }
11216 
11217  // If the V2 source is undef or zero then we can use a tbl1, as tbl1 will fill
11218  // out of range values with 0s. We do need to make sure that any out-of-range
11219  // values are really out-of-range for a v16i8 vector.
11220  bool IsUndefOrZero = V2.isUndef() || isZerosVector(V2.getNode());
11221  MVT IndexVT = MVT::v8i8;
11222  unsigned IndexLen = 8;
11223  if (Op.getValueSizeInBits() == 128) {
11224  IndexVT = MVT::v16i8;
11225  IndexLen = 16;
11226  }
11227 
11228  SmallVector<SDValue, 8> TBLMask;
11229  for (int Val : ShuffleMask) {
11230  for (unsigned Byte = 0; Byte < BytesPerElt; ++Byte) {
11231  unsigned Offset = Byte + Val * BytesPerElt;
11232  if (Swap)
11233  Offset = Offset < IndexLen ? Offset + IndexLen : Offset - IndexLen;
11234  if (IsUndefOrZero && Offset >= IndexLen)
11235  Offset = 255;
11236  TBLMask.push_back(DAG.getConstant(Offset, DL, MVT::i32));
11237  }
11238  }
11239 
11240  SDValue V1Cst = DAG.getNode(ISD::BITCAST, DL, IndexVT, V1);
11241  SDValue V2Cst = DAG.getNode(ISD::BITCAST, DL, IndexVT, V2);
11242 
11243  SDValue Shuffle;
11244  if (IsUndefOrZero) {
11245  if (IndexLen == 8)
11246  V1Cst = DAG.getNode(ISD::CONCAT_VECTORS, DL, MVT::v16i8, V1Cst, V1Cst);
11247  Shuffle = DAG.getNode(
11248  ISD::INTRINSIC_WO_CHAIN, DL, IndexVT,
11249  DAG.getConstant(Intrinsic::aarch64_neon_tbl1, DL, MVT::i32), V1Cst,
11250  DAG.getBuildVector(IndexVT, DL, ArrayRef(TBLMask.data(), IndexLen)));
11251  } else {
11252  if (IndexLen == 8) {
11253  V1Cst = DAG.getNode(ISD::CONCAT_VECTORS, DL, MVT::v16i8, V1Cst, V2Cst);
11254  Shuffle = DAG.getNode(
11255  ISD::INTRINSIC_WO_CHAIN, DL, IndexVT,
11256  DAG.getConstant(Intrinsic::aarch64_neon_tbl1, DL, MVT::i32), V1Cst,
11257  DAG.getBuildVector(IndexVT, DL, ArrayRef(TBLMask.data(), IndexLen)));
11258  } else {
11259  // FIXME: We cannot, for the moment, emit a TBL2 instruction because we
11260  // cannot currently represent the register constraints on the input
11261  // table registers.
11262  // Shuffle = DAG.getNode(AArch64ISD::TBL2, DL, IndexVT, V1Cst, V2Cst,
11263  // DAG.getBuildVector(IndexVT, DL, &TBLMask[0],
11264  // IndexLen));
11265  Shuffle = DAG.getNode(
11266  ISD::INTRINSIC_WO_CHAIN, DL, IndexVT,
11267  DAG.getConstant(Intrinsic::aarch64_neon_tbl2, DL, MVT::i32), V1Cst,
11268  V2Cst,
11269  DAG.getBuildVector(IndexVT, DL, ArrayRef(TBLMask.data(), IndexLen)));
11270  }
11271  }
11272  return DAG.getNode(ISD::BITCAST, DL, Op.getValueType(), Shuffle);
11273 }
11274 
11275 static unsigned getDUPLANEOp(EVT EltType) {
11276  if (EltType == MVT::i8)
11277  return AArch64ISD::DUPLANE8;
11278  if (EltType == MVT::i16 || EltType == MVT::f16 || EltType == MVT::bf16)
11279  return AArch64ISD::DUPLANE16;
11280  if (EltType == MVT::i32 || EltType == MVT::f32)
11281  return AArch64ISD::DUPLANE32;
11282  if (EltType == MVT::i64 || EltType == MVT::f64)
11283  return AArch64ISD::DUPLANE64;
11284 
11285  llvm_unreachable("Invalid vector element type?");
11286 }
11287 
11288 static SDValue constructDup(SDValue V, int Lane, SDLoc dl, EVT VT,
11289  unsigned Opcode, SelectionDAG &DAG) {
11290  // Try to eliminate a bitcasted extract subvector before a DUPLANE.
11291  auto getScaledOffsetDup = [](SDValue BitCast, int &LaneC, MVT &CastVT) {
11292  // Match: dup (bitcast (extract_subv X, C)), LaneC
11293  if (BitCast.getOpcode() != ISD::BITCAST ||
11295  return false;
11296 
11297  // The extract index must align in the destination type. That may not
11298  // happen if the bitcast is from narrow to wide type.
11299  SDValue Extract = BitCast.getOperand(0);
11300  unsigned ExtIdx = Extract.getConstantOperandVal(1);
11301  unsigned SrcEltBitWidth = Extract.getScalarValueSizeInBits();
11302  unsigned ExtIdxInBits = ExtIdx * SrcEltBitWidth;
11303  unsigned CastedEltBitWidth = BitCast.getScalarValueSizeInBits();
11304  if (ExtIdxInBits % CastedEltBitWidth != 0)
11305  return false;
11306 
11307  // Can't handle cases where vector size is not 128-bit
11308  if (!Extract.getOperand(0).getValueType().is128BitVector())
11309  return false;
11310 
11311  // Update the lane value by offsetting with the scaled extract index.
11312  LaneC += ExtIdxInBits / CastedEltBitWidth;
11313 
11314  // Determine the casted vector type of the wide vector input.
11315  // dup (bitcast (extract_subv X, C)), LaneC --> dup (bitcast X), LaneC'
11316  // Examples:
11317  // dup (bitcast (extract_subv v2f64 X, 1) to v2f32), 1 --> dup v4f32 X, 3
11318  // dup (bitcast (extract_subv v16i8 X, 8) to v4i16), 1 --> dup v8i16 X, 5
11319  unsigned SrcVecNumElts =
11320  Extract.getOperand(0).getValueSizeInBits() / CastedEltBitWidth;
11321  CastVT = MVT::getVectorVT(BitCast.getSimpleValueType().getScalarType(),
11322  SrcVecNumElts);
11323  return true;
11324  };
11325  MVT CastVT;
11326  if (getScaledOffsetDup(V, Lane, CastVT)) {
11327  V = DAG.getBitcast(CastVT, V.getOperand(0).getOperand(0));
11328  } else if (V.getOpcode() == ISD::EXTRACT_SUBVECTOR &&
11330  // The lane is incremented by the index of the extract.
11331  // Example: dup v2f32 (extract v4f32 X, 2), 1 --> dup v4f32 X, 3
11332  Lane += V.getConstantOperandVal(1);
11333  V = V.getOperand(0);
11334  } else if (V.getOpcode() == ISD::CONCAT_VECTORS) {
11335  // The lane is decremented if we are splatting from the 2nd operand.
11336  // Example: dup v4i32 (concat v2i32 X, v2i32 Y), 3 --> dup v4i32 Y, 1
11337  unsigned Idx = Lane >= (int)VT.getVectorNumElements() / 2;
11338  Lane -= Idx * VT.getVectorNumElements() / 2;
11339  V = WidenVector(V.getOperand(Idx), DAG);
11340  } else if (VT.getSizeInBits() == 64) {
11341  // Widen the operand to 128-bit register with undef.
11342  V = WidenVector(V, DAG);
11343  }
11344  return DAG.getNode(Opcode, dl, VT, V, DAG.getConstant(Lane, dl, MVT::i64));
11345 }
11346 
11347 // Return true if we can get a new shuffle mask by checking the parameter mask
11348 // array to test whether every two adjacent mask values are continuous and
11349 // starting from an even number.
11351  SmallVectorImpl<int> &NewMask) {
11352  unsigned NumElts = VT.getVectorNumElements();
11353  if (NumElts % 2 != 0)
11354  return false;
11355 
11356  NewMask.clear();
11357  for (unsigned i = 0; i < NumElts; i += 2) {
11358  int M0 = M[i];
11359  int M1 = M[i + 1];
11360 
11361  // If both elements are undef, new mask is undef too.
11362  if (M0 == -1 && M1 == -1) {
11363  NewMask.push_back(-1);
11364  continue;
11365  }
11366 
11367  if (M0 == -1 && M1 != -1 && (M1 % 2) == 1) {
11368  NewMask.push_back(M1 / 2);
11369  continue;
11370  }
11371 
11372  if (M0 != -1 && (M0 % 2) == 0 && ((M0 + 1) == M1 || M1 == -1)) {
11373  NewMask.push_back(M0 / 2);
11374  continue;
11375  }
11376 
11377  NewMask.clear();
11378  return false;
11379  }
11380 
11381  assert(NewMask.size() == NumElts / 2 && "Incorrect size for mask!");
11382  return true;
11383 }
11384 
11385 // Try to widen element type to get a new mask value for a better permutation
11386 // sequence, so that we can use NEON shuffle instructions, such as zip1/2,
11387 // UZP1/2, TRN1/2, REV, INS, etc.
11388 // For example:
11389 // shufflevector <4 x i32> %a, <4 x i32> %b,
11390 // <4 x i32> <i32 6, i32 7, i32 2, i32 3>
11391 // is equivalent to:
11392 // shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 3, i32 1>
11393 // Finally, we can get:
11394 // mov v0.d[0], v1.d[1]
11396  SDLoc DL(Op);
11397  EVT VT = Op.getValueType();
11398  EVT ScalarVT = VT.getVectorElementType();
11399  unsigned ElementSize = ScalarVT.getFixedSizeInBits();
11400  SDValue V0 = Op.getOperand(0);
11401  SDValue V1 = Op.getOperand(1);
11402  ArrayRef<int> Mask = cast<ShuffleVectorSDNode>(Op)->getMask();
11403 
11404  // If combining adjacent elements, like two i16's -> i32, two i32's -> i64 ...
11405  // We need to make sure the wider element type is legal. Thus, ElementSize
11406  // should be not larger than 32 bits, and i1 type should also be excluded.
11407  if (ElementSize > 32 || ElementSize == 1)
11408  return SDValue();
11409 
11410  SmallVector<int, 8> NewMask;
11411  if (isWideTypeMask(Mask, VT, NewMask)) {
11412  MVT NewEltVT = VT.isFloatingPoint()
11413  ? MVT::getFloatingPointVT(ElementSize * 2)
11414  : MVT::getIntegerVT(ElementSize * 2);
11415  MVT NewVT = MVT::getVectorVT(NewEltVT, VT.getVectorNumElements() / 2);
11416  if (DAG.getTargetLoweringInfo().isTypeLegal(NewVT)) {
11417  V0 = DAG.getBitcast(NewVT, V0);
11418  V1 = DAG.getBitcast(NewVT, V1);
11419  return DAG.getBitcast(VT,
11420  DAG.getVectorShuffle(NewVT, DL, V0, V1, NewMask));
11421  }
11422  }
11423 
11424  return SDValue();
11425 }
11426 
11427 // Try to fold shuffle (tbl2, tbl2) into a single tbl4.
11429  ArrayRef<int> ShuffleMask,
11430  SelectionDAG &DAG) {
11431  SDValue Tbl1 = Op->getOperand(0);
11432  SDValue Tbl2 = Op->getOperand(1);
11433  SDLoc dl(Op);
11434  SDValue Tbl2ID =
11435  DAG.getTargetConstant(Intrinsic::aarch64_neon_tbl2, dl, MVT::i64);
11436 
11437  EVT VT = Op.getValueType();
11438  if (Tbl1->getOpcode() != ISD::INTRINSIC_WO_CHAIN ||
11439  Tbl1->getOperand(0) != Tbl2ID ||
11440  Tbl2->getOpcode() != ISD::INTRINSIC_WO_CHAIN ||
11441  Tbl2->getOperand(0) != Tbl2ID)
11442  return SDValue();
11443 
11444  if (Tbl1->getValueType(0) != MVT::v16i8 ||
11445  Tbl2->getValueType(0) != MVT::v16i8)
11446  return SDValue();
11447 
11448  SDValue Mask1 = Tbl1->getOperand(3);
11449  SDValue Mask2 = Tbl2->getOperand(3);
11450  SmallVector<SDValue, 16> TBLMaskParts(16, SDValue());
11451  for (unsigned I = 0; I < 16; I++) {
11452  if (ShuffleMask[I] < 16)
11453  TBLMaskParts[I] = Mask1->getOperand(ShuffleMask[I]);
11454  else {
11455  auto *C =
11456  dyn_cast<ConstantSDNode>(Mask2->getOperand(ShuffleMask[I] - 16));
11457  if (!C)
11458  return SDValue();
11459  TBLMaskParts[I] = DAG.getConstant(C->getSExtValue() + 32, dl, MVT::i32);
11460  }
11461  }
11462 
11463  SDValue TBLMask = DAG.getBuildVector(VT, dl, TBLMaskParts);
11464  SDValue ID =
11465  DAG.getTargetConstant(Intrinsic::aarch64_neon_tbl4, dl, MVT::i64);
11466 
11467  return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, MVT::v16i8,
11468  {ID, Tbl1->getOperand(1), Tbl1->getOperand(2),
11469  Tbl2->getOperand(1), Tbl2->getOperand(2), TBLMask});
11470 }
11471 
11472 // Baseline legalization for ZERO_EXTEND_VECTOR_INREG will blend-in zeros,
11473 // but we don't have an appropriate instruction,
11474 // so custom-lower it as ZIP1-with-zeros.
11475 SDValue
11476 AArch64TargetLowering::LowerZERO_EXTEND_VECTOR_INREG(SDValue Op,
11477  SelectionDAG &DAG) const {
11478  SDLoc dl(Op);
11479  EVT VT = Op.getValueType();
11480  SDValue SrcOp = Op.getOperand(0);
11481  EVT SrcVT = SrcOp.getValueType();
11482  assert(VT.getScalarSizeInBits() % SrcVT.getScalarSizeInBits() == 0 &&
11483  "Unexpected extension factor.");
11484  unsigned Scale = VT.getScalarSizeInBits() / SrcVT.getScalarSizeInBits();
11485  // FIXME: support multi-step zipping?
11486  if (Scale != 2)
11487  return SDValue();
11488  SDValue Zeros = DAG.getConstant(0, dl, SrcVT);
11489  return DAG.getBitcast(VT,
11490  DAG.getNode(AArch64ISD::ZIP1, dl, SrcVT, SrcOp, Zeros));
11491 }
11492 
11493 SDValue AArch64TargetLowering::LowerVECTOR_SHUFFLE(SDValue Op,
11494  SelectionDAG &DAG) const {
11495  SDLoc dl(Op);
11496  EVT VT = Op.getValueType();
11497 
11498  ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(Op.getNode());
11499 
11501  Subtarget->forceStreamingCompatibleSVE()))
11502  return LowerFixedLengthVECTOR_SHUFFLEToSVE(Op, DAG);
11503 
11504  // Convert shuffles that are directly supported on NEON to target-specific
11505  // DAG nodes, instead of keeping them as shuffles and matching them again
11506  // during code selection. This is more efficient and avoids the possibility
11507  // of inconsistencies between legalization and selection.
11508  ArrayRef<int> ShuffleMask = SVN->getMask();
11509 
11510  SDValue V1 = Op.getOperand(0);
11511  SDValue V2 = Op.getOperand(1);
11512 
11513  assert(V1.getValueType() == VT && "Unexpected VECTOR_SHUFFLE type!");
11514  assert(ShuffleMask.size() == VT.getVectorNumElements() &&
11515  "Unexpected VECTOR_SHUFFLE mask size!");
11516 
11517  if (SDValue Res = tryToConvertShuffleOfTbl2ToTbl4(Op, ShuffleMask, DAG))
11518  return Res;
11519 
11520  if (SVN->isSplat()) {
11521  int Lane = SVN->getSplatIndex();
11522  // If this is undef splat, generate it via "just" vdup, if possible.
11523  if (Lane == -1)
11524  Lane = 0;
11525 
11526  if (Lane == 0 && V1.getOpcode() == ISD::SCALAR_TO_VECTOR)
11527  return DAG.getNode(AArch64ISD::DUP, dl, V1.getValueType(),
11528  V1.getOperand(0));
11529  // Test if V1 is a BUILD_VECTOR and the lane being referenced is a non-
11530  // constant. If so, we can just reference the lane's definition directly.
11531  if (V1.getOpcode() == ISD::BUILD_VECTOR &&
11532  !isa<ConstantSDNode>(V1.getOperand(Lane)))
11533  return DAG.getNode(AArch64ISD::DUP, dl, VT, V1.getOperand(Lane));
11534 
11535  // Otherwise, duplicate from the lane of the input vector.
11536  unsigned Opcode = getDUPLANEOp(V1.getValueType().getVectorElementType());
11537  return constructDup(V1, Lane, dl, VT, Opcode, DAG);
11538  }
11539 
11540  // Check if the mask matches a DUP for a wider element
11541  for (unsigned LaneSize : {64U, 32U, 16U}) {
11542  unsigned Lane = 0;
11543  if (isWideDUPMask(ShuffleMask, VT, LaneSize, Lane)) {
11544  unsigned Opcode = LaneSize == 64 ? AArch64ISD::DUPLANE64
11545  : LaneSize == 32 ? AArch64ISD::DUPLANE32
11547  // Cast V1 to an integer vector with required lane size
11548  MVT NewEltTy = MVT::getIntegerVT(LaneSize);
11549  unsigned NewEltCount = VT.getSizeInBits() / LaneSize;
11550  MVT NewVecTy = MVT::getVectorVT(NewEltTy, NewEltCount);
11551  V1 = DAG.getBitcast(NewVecTy, V1);
11552  // Constuct the DUP instruction
11553  V1 = constructDup(V1, Lane, dl, NewVecTy, Opcode, DAG);
11554  // Cast back to the original type
11555  return DAG.getBitcast(VT, V1);
11556  }
11557  }
11558 
11559  if (isREVMask(ShuffleMask, VT, 64))
11560  return DAG.getNode(AArch64ISD::REV64, dl, V1.getValueType(), V1, V2);
11561  if (isREVMask(ShuffleMask, VT, 32))
11562  return DAG.getNode(AArch64ISD::REV32, dl, V1.getValueType(), V1, V2);
11563  if (isREVMask(ShuffleMask, VT, 16))
11564  return DAG.getNode(AArch64ISD::REV16, dl, V1.getValueType(), V1, V2);
11565 
11566  if (((VT.getVectorNumElements() == 8 && VT.getScalarSizeInBits() == 16) ||
11567  (VT.getVectorNumElements() == 16 && VT.getScalarSizeInBits() == 8)) &&
11568  ShuffleVectorInst::isReverseMask(ShuffleMask)) {
11569  SDValue Rev = DAG.getNode(AArch64ISD::REV64, dl, VT, V1);
11570  return DAG.getNode(AArch64ISD::EXT, dl, VT, Rev, Rev,
11571  DAG.getConstant(8, dl, MVT::i32));
11572  }
11573 
11574  bool ReverseEXT = false;
11575  unsigned Imm;
11576  if (isEXTMask(ShuffleMask, VT, ReverseEXT, Imm)) {
11577  if (ReverseEXT)
11578  std::swap(V1, V2);
11579  Imm *= getExtFactor(V1);
11580  return DAG.getNode(AArch64ISD::EXT, dl, V1.getValueType(), V1, V2,
11581  DAG.getConstant(Imm, dl, MVT::i32));
11582  } else if (V2->isUndef() && isSingletonEXTMask(ShuffleMask, VT, Imm)) {
11583  Imm *= getExtFactor(V1);
11584  return DAG.getNode(AArch64ISD::EXT, dl, V1.getValueType(), V1, V1,
11585  DAG.getConstant(Imm, dl, MVT::i32));
11586  }
11587 
11588  unsigned WhichResult;
11589  if (isZIPMask(ShuffleMask, VT, WhichResult)) {
11590  unsigned Opc = (WhichResult == 0) ? AArch64ISD::ZIP1 : AArch64ISD::ZIP2;
11591  return DAG.getNode(Opc, dl, V1.getValueType(), V1, V2);
11592  }
11593  if (isUZPMask(ShuffleMask, VT, WhichResult)) {
11594  unsigned Opc = (WhichResult == 0) ? AArch64ISD::UZP1 : AArch64ISD::UZP2;
11595  return DAG.getNode(Opc, dl, V1.getValueType(), V1, V2);
11596  }
11597  if (isTRNMask(ShuffleMask, VT, WhichResult)) {
11598  unsigned Opc = (WhichResult == 0) ? AArch64ISD::TRN1 : AArch64ISD::TRN2;
11599  return DAG.getNode(Opc, dl, V1.getValueType(), V1, V2);
11600  }
11601 
11602  if (isZIP_v_undef_Mask(ShuffleMask, VT, WhichResult)) {
11603  unsigned Opc = (WhichResult == 0) ? AArch64ISD::ZIP1 : AArch64ISD::ZIP2;
11604  return DAG.getNode(Opc, dl, V1.getValueType(), V1, V1);
11605  }
11606  if (isUZP_v_undef_Mask(ShuffleMask, VT, WhichResult)) {
11607  unsigned Opc = (WhichResult == 0) ? AArch64ISD::UZP1 : AArch64ISD::UZP2;
11608  return DAG.getNode(Opc, dl, V1.getValueType(), V1, V1);
11609  }
11610  if (isTRN_v_undef_Mask(ShuffleMask, VT, WhichResult)) {
11611  unsigned Opc = (WhichResult == 0) ? AArch64ISD::TRN1 : AArch64ISD::TRN2;
11612  return DAG.getNode(Opc, dl, V1.getValueType(), V1, V1);
11613  }
11614 
11616  return Concat;
11617 
11618  bool DstIsLeft;
11619  int Anomaly;
11620  int NumInputElements = V1.getValueType().getVectorNumElements();
11621  if (isINSMask(ShuffleMask, NumInputElements, DstIsLeft, Anomaly)) {
11622  SDValue DstVec = DstIsLeft ? V1 : V2;
11623  SDValue DstLaneV = DAG.getConstant(Anomaly, dl, MVT::i64);
11624 
11625  SDValue SrcVec = V1;
11626  int SrcLane = ShuffleMask[Anomaly];
11627  if (SrcLane >= NumInputElements) {
11628  SrcVec = V2;
11629  SrcLane -= VT.getVectorNumElements();
11630  }
11631  SDValue SrcLaneV = DAG.getConstant(SrcLane, dl, MVT::i64);
11632 
11633  EVT ScalarVT = VT.getVectorElementType();
11634 
11635  if (ScalarVT.getFixedSizeInBits() < 32 && ScalarVT.isInteger())
11636  ScalarVT = MVT::i32;
11637 
11638  return DAG.getNode(
11639  ISD::INSERT_VECTOR_ELT, dl, VT, DstVec,
11640  DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, ScalarVT, SrcVec, SrcLaneV),
11641  DstLaneV);
11642  }
11643 
11644  if (SDValue NewSD = tryWidenMaskForShuffle(Op, DAG))
11645  return NewSD;
11646 
11647  // If the shuffle is not directly supported and it has 4 elements, use
11648  // the PerfectShuffle-generated table to synthesize it from other shuffles.
11649  unsigned NumElts = VT.getVectorNumElements();
11650  if (NumElts == 4) {
11651  unsigned PFIndexes[4];
11652  for (unsigned i = 0; i != 4; ++i) {
11653  if (ShuffleMask[i] < 0)
11654  PFIndexes[i] = 8;
11655  else
11656  PFIndexes[i] = ShuffleMask[i];
11657  }
11658 
11659  // Compute the index in the perfect shuffle table.
11660  unsigned PFTableIndex = PFIndexes[0] * 9 * 9 * 9 + PFIndexes[1] * 9 * 9 +
11661  PFIndexes[2] * 9 + PFIndexes[3];
11662  unsigned PFEntry = PerfectShuffleTable[PFTableIndex];
11663  return GeneratePerfectShuffle(PFTableIndex, V1, V2, PFEntry, V1, V2, DAG,
11664  dl);
11665  }
11666 
11667  return GenerateTBL(Op, ShuffleMask, DAG);
11668 }
11669 
11670 SDValue AArch64TargetLowering::LowerSPLAT_VECTOR(SDValue Op,
11671  SelectionDAG &DAG) const {
11672  EVT VT = Op.getValueType();
11673 
11675  Subtarget->forceStreamingCompatibleSVE()))
11676  return LowerToScalableOp(Op, DAG);
11677 
11679  "Unexpected vector type!");
11680 
11681  // We can handle the constant cases during isel.
11682  if (isa<ConstantSDNode>(Op.getOperand(0)))
11683  return Op;
11684 
11685  // There isn't a natural way to handle the general i1 case, so we use some
11686  // trickery with whilelo.
11687  SDLoc DL(Op);
11688  SDValue SplatVal = DAG.getAnyExtOrTrunc(Op.getOperand(0), DL, MVT::i64);
11689  SplatVal = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, MVT::i64, SplatVal,
11690  DAG.getValueType(MVT::i1));
11691  SDValue ID =
11692  DAG.getTargetConstant(Intrinsic::aarch64_sve_whilelo, DL, MVT::i64);
11693  SDValue Zero = DAG.getConstant(0, DL, MVT::i64);
11694  if (VT == MVT::nxv1i1)
11697  Zero, SplatVal),
11698  Zero);
11699  return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, VT, ID, Zero, SplatVal);
11700 }
11701 
11702 SDValue AArch64TargetLowering::LowerDUPQLane(SDValue Op,
11703  SelectionDAG &DAG) const {
11704  SDLoc DL(Op);
11705 
11706  EVT VT = Op.getValueType();
11707  if (!isTypeLegal(VT) || !VT.isScalableVector())
11708  return SDValue();
11709 
11710  // Current lowering only supports the SVE-ACLE types.
11712  return SDValue();
11713 
11714  // The DUPQ operation is indepedent of element type so normalise to i64s.
11715  SDValue Idx128 = Op.getOperand(2);
11716 
11717  // DUPQ can be used when idx is in range.
11718  auto *CIdx = dyn_cast<ConstantSDNode>(Idx128);
11719  if (CIdx && (CIdx->getZExtValue() <= 3)) {
11720  SDValue CI = DAG.getTargetConstant(CIdx->getZExtValue(), DL, MVT::i64);
11721  return DAG.getNode(AArch64ISD::DUPLANE128, DL, VT, Op.getOperand(1), CI);
11722  }
11723 
11724  SDValue V = DAG.getNode(ISD::BITCAST, DL, MVT::nxv2i64, Op.getOperand(1));
11725 
11726  // The ACLE says this must produce the same result as:
11727  // svtbl(data, svadd_x(svptrue_b64(),
11728  // svand_x(svptrue_b64(), svindex_u64(0, 1), 1),
11729  // index * 2))
11730  SDValue One = DAG.getConstant(1, DL, MVT::i64);
11731  SDValue SplatOne = DAG.getNode(ISD::SPLAT_VECTOR, DL, MVT::nxv2i64, One);
11732 
11733  // create the vector 0,1,0,1,...
11734  SDValue SV = DAG.getStepVector(DL, MVT::nxv2i64);
11735  SV = DAG.getNode(ISD::AND, DL, MVT::nxv2i64, SV, SplatOne);
11736 
11737  // create the vector idx64,idx64+1,idx64,idx64+1,...
11738  SDValue Idx64 = DAG.getNode(ISD::ADD, DL, MVT::i64, Idx128, Idx128);
11739  SDValue SplatIdx64 = DAG.getNode(ISD::SPLAT_VECTOR, DL, MVT::nxv2i64, Idx64);
11740  SDValue ShuffleMask = DAG.getNode(ISD::ADD, DL, MVT::nxv2i64, SV, SplatIdx64);
11741 
11742  // create the vector Val[idx64],Val[idx64+1],Val[idx64],Val[idx64+1],...
11743  SDValue TBL = DAG.getNode(AArch64ISD::TBL, DL, MVT::nxv2i64, V, ShuffleMask);
11744  return DAG.getNode(ISD::BITCAST, DL, VT, TBL);
11745 }
11746 
11747 
11748 static bool resolveBuildVector(BuildVectorSDNode *BVN, APInt &CnstBits,
11749  APInt &UndefBits) {
11750  EVT VT = BVN->getValueType(0);
11751  APInt SplatBits, SplatUndef;
11752  unsigned SplatBitSize;
11753  bool HasAnyUndefs;
11754  if (BVN->isConstantSplat(SplatBits, SplatUndef, SplatBitSize, HasAnyUndefs)) {
11755  unsigned NumSplats = VT.getSizeInBits() / SplatBitSize;
11756 
11757  for (unsigned i = 0; i < NumSplats; ++i) {
11758  CnstBits <<= SplatBitSize;
11759  UndefBits <<= SplatBitSize;
11760  CnstBits |= SplatBits.zextOrTrunc(VT.getSizeInBits());
11761  UndefBits |= (SplatBits ^ SplatUndef).zextOrTrunc(VT.getSizeInBits());
11762  }
11763 
11764  return true;
11765  }
11766 
11767  return false;
11768 }
11769 
11770 // Try 64-bit splatted SIMD immediate.
11771 static SDValue tryAdvSIMDModImm64(unsigned NewOp, SDValue Op, SelectionDAG &DAG,
11772  const APInt &Bits) {
11773  if (Bits.getHiBits(64) == Bits.getLoBits(64)) {
11774  uint64_t Value = Bits.zextOrTrunc(64).getZExtValue();
11775  EVT VT = Op.getValueType();
11776  MVT MovTy = (VT.getSizeInBits() == 128) ? MVT::v2i64 : MVT::f64;
11777 
11780 
11781  SDLoc dl(Op);
11782  SDValue Mov = DAG.getNode(NewOp, dl, MovTy,
11783  DAG.getConstant(Value, dl, MVT::i32));
11784  return DAG.getNode(AArch64ISD::NVCAST, dl, VT, Mov);
11785  }
11786  }
11787 
11788  return SDValue();
11789 }
11790 
11791 // Try 32-bit splatted SIMD immediate.
11792 static SDValue tryAdvSIMDModImm32(unsigned NewOp, SDValue Op, SelectionDAG &DAG,
11793  const APInt &Bits,
11794  const SDValue *LHS = nullptr) {
11795  EVT VT = Op.getValueType();
11796  if (VT.isFixedLengthVector() &&
11798  return SDValue();
11799 
11800  if (Bits.getHiBits(64) == Bits.getLoBits(64)) {
11801  uint64_t Value = Bits.zextOrTrunc(64).getZExtValue();
11802  MVT MovTy = (VT.getSizeInBits() == 128) ? MVT::v4i32 : MVT::v2i32;
11803  bool isAdvSIMDModImm = false;
11804  uint64_t Shift;
11805 
11806  if ((isAdvSIMDModImm = AArch64_AM::isAdvSIMDModImmType1(Value))) {
11808  Shift = 0;
11809  }
11810  else if ((isAdvSIMDModImm = AArch64_AM::isAdvSIMDModImmType2(Value))) {
11812  Shift = 8;
11813  }
11814  else if ((isAdvSIMDModImm = AArch64_AM::isAdvSIMDModImmType3(Value))) {
11816  Shift = 16;
11817  }
11818  else if ((isAdvSIMDModImm = AArch64_AM::isAdvSIMDModImmType4(Value))) {
11820  Shift = 24;
11821  }
11822 
11823  if (isAdvSIMDModImm) {
11824  SDLoc dl(Op);
11825  SDValue Mov;
11826 
11827  if (LHS)
11828  Mov = DAG.getNode(NewOp, dl, MovTy, *LHS,
11829  DAG.getConstant(Value, dl, MVT::i32),
11830  DAG.getConstant(Shift, dl, MVT::i32));
11831  else
11832  Mov = DAG.getNode(NewOp, dl, MovTy,
11833  DAG.getConstant(Value, dl, MVT::i32),
11834  DAG.getConstant(Shift, dl, MVT::i32));
11835 
11836  return DAG.getNode(AArch64ISD::NVCAST, dl, VT, Mov);
11837  }
11838  }
11839 
11840  return SDValue();
11841 }
11842 
11843 // Try 16-bit splatted SIMD immediate.
11844 static SDValue tryAdvSIMDModImm16(unsigned NewOp, SDValue Op, SelectionDAG &DAG,
11845  const APInt &Bits,
11846  const SDValue *LHS = nullptr) {
11847  EVT VT = Op.getValueType();
11848  if (VT.isFixedLengthVector() &&
11850  return SDValue();
11851 
11852  if (Bits.getHiBits(64) == Bits.getLoBits(64)) {
11853  uint64_t Value = Bits.zextOrTrunc(64).getZExtValue();
11854  MVT MovTy = (VT.getSizeInBits() == 128) ? MVT::v8i16 : MVT::v4i16;
11855  bool isAdvSIMDModImm = false;
11856  uint64_t Shift;
11857 
11858  if ((isAdvSIMDModImm = AArch64_AM::isAdvSIMDModImmType5(Value))) {
11860  Shift = 0;
11861  }
11862  else if ((isAdvSIMDModImm = AArch64_AM::isAdvSIMDModImmType6(Value))) {
11864  Shift = 8;
11865  }
11866 
11867  if (isAdvSIMDModImm) {
11868  SDLoc dl(Op);
11869  SDValue Mov;
11870 
11871  if (LHS)
11872  Mov = DAG.getNode(NewOp, dl, MovTy, *LHS,
11873  DAG.getConstant(Value, dl, MVT::i32),
11874  DAG.getConstant(Shift, dl, MVT::i32));
11875  else
11876  Mov = DAG.getNode(NewOp, dl, MovTy,
11877  DAG.getConstant(Value, dl, MVT::i32),
11878  DAG.getConstant(Shift, dl, MVT::i32));
11879 
11880  return DAG.getNode(AArch64ISD::NVCAST, dl, VT, Mov);
11881  }
11882  }
11883 
11884  return SDValue();
11885 }
11886 
11887 // Try 32-bit splatted SIMD immediate with shifted ones.
11888 static SDValue tryAdvSIMDModImm321s(unsigned NewOp, SDValue Op,
11889  SelectionDAG &DAG, const APInt &Bits) {
11890  if (Bits.getHiBits(64) == Bits.getLoBits(64)) {
11891  uint64_t Value = Bits.zextOrTrunc(64).getZExtValue();
11892  EVT VT = Op.getValueType();
11893  MVT MovTy = (VT.getSizeInBits() == 128) ? MVT::v4i32 : MVT::v2i32;
11894  bool isAdvSIMDModImm = false;
11895  uint64_t Shift;
11896 
11897  if ((isAdvSIMDModImm = AArch64_AM::isAdvSIMDModImmType7(Value))) {
11899  Shift = 264;
11900  }
11901  else if ((isAdvSIMDModImm = AArch64_AM::isAdvSIMDModImmType8(Value))) {
11903  Shift = 272;
11904  }
11905 
11906  if (isAdvSIMDModImm) {
11907  SDLoc dl(Op);
11908  SDValue Mov = DAG.getNode(NewOp, dl, MovTy,
11909  DAG.getConstant(Value, dl, MVT::i32),
11910  DAG.getConstant(Shift, dl, MVT::i32));
11911  return DAG.getNode(AArch64ISD::NVCAST, dl, VT, Mov);
11912  }
11913  }
11914 
11915  return SDValue();
11916 }
11917 
11918 // Try 8-bit splatted SIMD immediate.
11919 static SDValue tryAdvSIMDModImm8(unsigned NewOp, SDValue Op, SelectionDAG &DAG,
11920  const APInt &Bits) {
11921  if (Bits.getHiBits(64) == Bits.getLoBits(64)) {
11922  uint64_t Value = Bits.zextOrTrunc(64).getZExtValue();
11923  EVT VT = Op.getValueType();
11924  MVT MovTy = (VT.getSizeInBits() == 128) ? MVT::v16i8 : MVT::v8i8;
11925 
11928 
11929  SDLoc dl(Op);
11930  SDValue Mov = DAG.getNode(NewOp, dl, MovTy,
11931  DAG.getConstant(Value, dl, MVT::i32));
11932  return DAG.getNode(AArch64ISD::NVCAST, dl, VT, Mov);
11933  }
11934  }
11935 
11936  return SDValue();
11937 }
11938 
11939 // Try FP splatted SIMD immediate.
11940 static SDValue tryAdvSIMDModImmFP(unsigned NewOp, SDValue Op, SelectionDAG &DAG,
11941  const APInt &Bits) {
11942  if (Bits.getHiBits(64) == Bits.getLoBits(64)) {
11943  uint64_t Value = Bits.zextOrTrunc(64).getZExtValue();
11944  EVT VT = Op.getValueType();
11945  bool isWide = (VT.getSizeInBits() == 128);
11946  MVT MovTy;
11947  bool isAdvSIMDModImm = false;
11948 
11949  if ((isAdvSIMDModImm = AArch64_AM::isAdvSIMDModImmType11(Value))) {
11951  MovTy = isWide ? MVT::v4f32 : MVT::v2f32;
11952  }
11953  else if (isWide &&
11954  (isAdvSIMDModImm = AArch64_AM::isAdvSIMDModImmType12(Value))) {
11956  MovTy = MVT::v2f64;
11957  }
11958 
11959  if (isAdvSIMDModImm) {
11960  SDLoc dl(Op);
11961  SDValue Mov = DAG.getNode(NewOp, dl, MovTy,
11962  DAG.getConstant(Value, dl, MVT::i32));
11963  return DAG.getNode(AArch64ISD::NVCAST, dl, VT, Mov);
11964  }
11965  }
11966 
11967  return SDValue();
11968 }
11969 
11970 // Specialized code to quickly find if PotentialBVec is a BuildVector that
11971 // consists of only the same constant int value, returned in reference arg
11972 // ConstVal
11973 static bool isAllConstantBuildVector(const SDValue &PotentialBVec,
11974  uint64_t &ConstVal) {
11975  BuildVectorSDNode *Bvec = dyn_cast<BuildVectorSDNode>(PotentialBVec);
11976  if (!Bvec)
11977  return false;
11978  ConstantSDNode *FirstElt = dyn_cast<ConstantSDNode>(Bvec->getOperand(0));
11979  if (!FirstElt)
11980  return false;
11981  EVT VT = Bvec->getValueType(0);
11982  unsigned NumElts = VT.getVectorNumElements();
11983  for (unsigned i = 1; i < NumElts; ++i)
11984  if (dyn_cast<ConstantSDNode>(Bvec->getOperand(i)) != FirstElt)
11985  return false;
11986  ConstVal = FirstElt->getZExtValue();
11987  return true;
11988 }
11989 
11990 // Attempt to form a vector S[LR]I from (or (and X, BvecC1), (lsl Y, C2)),
11991 // to (SLI X, Y, C2), where X and Y have matching vector types, BvecC1 is a
11992 // BUILD_VECTORs with constant element C1, C2 is a constant, and:
11993 // - for the SLI case: C1 == ~(Ones(ElemSizeInBits) << C2)
11994 // - for the SRI case: C1 == ~(Ones(ElemSizeInBits) >> C2)
11995 // The (or (lsl Y, C2), (and X, BvecC1)) case is also handled.
11997  EVT VT = N->getValueType(0);
11998 
11999  if (!VT.isVector())
12000  return SDValue();
12001 
12002  SDLoc DL(N);
12003 
12004  SDValue And;
12005  SDValue Shift;
12006 
12007  SDValue FirstOp = N->getOperand(0);
12008  unsigned FirstOpc = FirstOp.getOpcode();
12009  SDValue SecondOp = N->getOperand(1);
12010  unsigned SecondOpc = SecondOp.getOpcode();
12011 
12012  // Is one of the operands an AND or a BICi? The AND may have been optimised to
12013  // a BICi in order to use an immediate instead of a register.
12014  // Is the other operand an shl or lshr? This will have been turned into:
12015  // AArch64ISD::VSHL vector, #shift or AArch64ISD::VLSHR vector, #shift.
12016  if ((FirstOpc == ISD::AND || FirstOpc == AArch64ISD::BICi) &&
12017  (SecondOpc == AArch64ISD::VSHL || SecondOpc == AArch64ISD::VLSHR)) {
12018  And = FirstOp;
12019  Shift = SecondOp;
12020 
12021  } else if ((SecondOpc == ISD::AND || SecondOpc == AArch64ISD::BICi) &&
12022  (FirstOpc == AArch64ISD::VSHL || FirstOpc == AArch64ISD::VLSHR)) {
12023  And = SecondOp;
12024  Shift = FirstOp;
12025  } else
12026  return SDValue();
12027 
12028  bool IsAnd = And.getOpcode() == ISD::AND;
12029  bool IsShiftRight = Shift.getOpcode() == AArch64ISD::VLSHR;
12030 
12031  // Is the shift amount constant?
12032  ConstantSDNode *C2node = dyn_cast<ConstantSDNode>(Shift.getOperand(1));
12033  if (!C2node)
12034  return SDValue();
12035 
12036  uint64_t C1;
12037  if (IsAnd) {
12038  // Is the and mask vector all constant?
12039  if (!isAllConstantBuildVector(And.getOperand(1), C1))
12040  return SDValue();
12041  } else {
12042  // Reconstruct the corresponding AND immediate from the two BICi immediates.
12043  ConstantSDNode *C1nodeImm = dyn_cast<ConstantSDNode>(And.getOperand(1));
12044  ConstantSDNode *C1nodeShift = dyn_cast<ConstantSDNode>(And.getOperand(2));
12045  assert(C1nodeImm && C1nodeShift);
12046  C1 = ~(C1nodeImm->getZExtValue() << C1nodeShift->getZExtValue());
12047  }
12048 
12049  // Is C1 == ~(Ones(ElemSizeInBits) << C2) or
12050  // C1 == ~(Ones(ElemSizeInBits) >> C2), taking into account
12051  // how much one can shift elements of a particular size?
12052  uint64_t C2 = C2node->getZExtValue();
12053  unsigned ElemSizeInBits = VT.getScalarSizeInBits();
12054  if (C2 > ElemSizeInBits)
12055  return SDValue();
12056 
12057  APInt C1AsAPInt(ElemSizeInBits, C1);
12058  APInt RequiredC1 = IsShiftRight ? APInt::getHighBitsSet(ElemSizeInBits, C2)
12059  : APInt::getLowBitsSet(ElemSizeInBits, C2);
12060  if (C1AsAPInt != RequiredC1)
12061  return SDValue();
12062 
12063  SDValue X = And.getOperand(0);
12064  SDValue Y = Shift.getOperand(0);
12065 
12066  unsigned Inst = IsShiftRight ? AArch64ISD::VSRI : AArch64ISD::VSLI;
12067  SDValue ResultSLI = DAG.getNode(Inst, DL, VT, X, Y, Shift.getOperand(1));
12068 
12069  LLVM_DEBUG(dbgs() << "aarch64-lower: transformed: \n");
12070  LLVM_DEBUG(N->dump(&DAG));
12071  LLVM_DEBUG(dbgs() << "into: \n");
12072  LLVM_DEBUG(ResultSLI->dump(&DAG));
12073 
12074  ++NumShiftInserts;
12075  return ResultSLI;
12076 }
12077 
12078 SDValue AArch64TargetLowering::LowerVectorOR(SDValue Op,
12079  SelectionDAG &DAG) const {
12080  if (useSVEForFixedLengthVectorVT(Op.getValueType(),
12081  Subtarget->forceStreamingCompatibleSVE()))
12082  return LowerToScalableOp(Op, DAG);
12083 
12084  // Attempt to form a vector S[LR]I from (or (and X, C1), (lsl Y, C2))
12085  if (SDValue Res = tryLowerToSLI(Op.getNode(), DAG))
12086  return Res;
12087 
12088  EVT VT = Op.getValueType();
12089 
12090  SDValue LHS = Op.getOperand(0);
12091  BuildVectorSDNode *BVN =
12092  dyn_cast<BuildVectorSDNode>(Op.getOperand(1).getNode());
12093  if (!BVN) {
12094  // OR commutes, so try swapping the operands.
12095  LHS = Op.getOperand(1);
12096  BVN = dyn_cast<BuildVectorSDNode>(Op.getOperand(0).getNode());
12097  }
12098  if (!BVN)
12099  return Op;
12100 
12101  APInt DefBits(VT.getSizeInBits(), 0);
12102  APInt UndefBits(VT.getSizeInBits(), 0);
12103  if (resolveBuildVector(BVN, DefBits, UndefBits)) {
12104  SDValue NewOp;
12105 
12106  if ((NewOp = tryAdvSIMDModImm32(AArch64ISD::ORRi, Op, DAG,
12107  DefBits, &LHS)) ||
12108  (NewOp = tryAdvSIMDModImm16(AArch64ISD::ORRi, Op, DAG,
12109  DefBits, &LHS)))
12110  return NewOp;
12111 
12112  if ((NewOp = tryAdvSIMDModImm32(AArch64ISD::ORRi, Op, DAG,
12113  UndefBits, &LHS)) ||
12114  (NewOp = tryAdvSIMDModImm16(AArch64ISD::ORRi, Op, DAG,
12115  UndefBits, &LHS)))
12116  return NewOp;
12117  }
12118 
12119  // We can always fall back to a non-immediate OR.
12120  return Op;
12121 }
12122 
12123 // Normalize the operands of BUILD_VECTOR. The value of constant operands will
12124 // be truncated to fit element width.
12126  SelectionDAG &DAG) {
12127  assert(Op.getOpcode() == ISD::BUILD_VECTOR && "Unknown opcode!");
12128  SDLoc dl(Op);
12129  EVT VT = Op.getValueType();
12130  EVT EltTy= VT.getVectorElementType();
12131 
12132  if (EltTy.isFloatingPoint() || EltTy.getSizeInBits() > 16)
12133  return Op;
12134 
12136  for (SDValue Lane : Op->ops()) {
12137  // For integer vectors, type legalization would have promoted the
12138  // operands already. Otherwise, if Op is a floating-point splat
12139  // (with operands cast to integers), then the only possibilities
12140  // are constants and UNDEFs.
12141  if (auto *CstLane = dyn_cast<ConstantSDNode>(Lane)) {
12142  APInt LowBits(EltTy.getSizeInBits(),
12143  CstLane->getZExtValue());
12144  Lane = DAG.getConstant(LowBits.getZExtValue(), dl, MVT::i32);
12145  } else if (Lane.getNode()->isUndef()) {
12146  Lane = DAG.getUNDEF(MVT::i32);
12147  } else {
12148  assert(Lane.getValueType() == MVT::i32 &&
12149  "Unexpected BUILD_VECTOR operand type");
12150  }
12151  Ops.push_back(Lane);
12152  }
12153  return DAG.getBuildVector(VT, dl, Ops);
12154 }
12155 
12157  EVT VT = Op.getValueType();
12158 
12159  APInt DefBits(VT.getSizeInBits(), 0);
12160  APInt UndefBits(VT.getSizeInBits(), 0);
12161  BuildVectorSDNode *BVN = cast<BuildVectorSDNode>(Op.getNode());
12162  if (resolveBuildVector(BVN, DefBits, UndefBits)) {
12163  SDValue NewOp;
12164  if ((NewOp = tryAdvSIMDModImm64(AArch64ISD::MOVIedit, Op, DAG, DefBits)) ||
12165  (NewOp = tryAdvSIMDModImm32(AArch64ISD::MOVIshift, Op, DAG, DefBits)) ||
12166  (NewOp = tryAdvSIMDModImm321s(AArch64ISD::MOVImsl, Op, DAG, DefBits)) ||
12167  (NewOp = tryAdvSIMDModImm16(AArch64ISD::MOVIshift, Op, DAG, DefBits)) ||
12168  (NewOp = tryAdvSIMDModImm8(AArch64ISD::MOVI, Op, DAG, DefBits)) ||
12169  (NewOp = tryAdvSIMDModImmFP(AArch64ISD::FMOV, Op, DAG, DefBits)))
12170  return NewOp;
12171 
12172  DefBits = ~DefBits;
12173  if ((NewOp = tryAdvSIMDModImm32(AArch64ISD::MVNIshift, Op, DAG, DefBits)) ||
12174  (NewOp = tryAdvSIMDModImm321s(AArch64ISD::MVNImsl, Op, DAG, DefBits)) ||
12175  (NewOp = tryAdvSIMDModImm16(AArch64ISD::MVNIshift, Op, DAG, DefBits)))
12176  return NewOp;
12177 
12178  DefBits = UndefBits;
12179  if ((NewOp = tryAdvSIMDModImm64(AArch64ISD::MOVIedit, Op, DAG, DefBits)) ||
12180  (NewOp = tryAdvSIMDModImm32(AArch64ISD::MOVIshift, Op, DAG, DefBits)) ||
12181  (NewOp = tryAdvSIMDModImm321s(AArch64ISD::MOVImsl, Op, DAG, DefBits)) ||
12182  (NewOp = tryAdvSIMDModImm16(AArch64ISD::MOVIshift, Op, DAG, DefBits)) ||
12183  (NewOp = tryAdvSIMDModImm8(AArch64ISD::MOVI, Op, DAG, DefBits)) ||
12184  (NewOp = tryAdvSIMDModImmFP(AArch64ISD::FMOV, Op, DAG, DefBits)))
12185  return NewOp;
12186 
12187  DefBits = ~UndefBits;
12188  if ((NewOp = tryAdvSIMDModImm32(AArch64ISD::MVNIshift, Op, DAG, DefBits)) ||
12189  (NewOp = tryAdvSIMDModImm321s(AArch64ISD::MVNImsl, Op, DAG, DefBits)) ||
12190  (NewOp = tryAdvSIMDModImm16(AArch64ISD::MVNIshift, Op, DAG, DefBits)))
12191  return NewOp;
12192  }
12193 
12194  return SDValue();
12195 }
12196 
12197 SDValue AArch64TargetLowering::LowerBUILD_VECTOR(SDValue Op,
12198  SelectionDAG &DAG) const {
12199  EVT VT = Op.getValueType();
12200 
12202  Subtarget->forceStreamingCompatibleSVE())) {
12203  if (auto SeqInfo = cast<BuildVectorSDNode>(Op)->isConstantSequence()) {
12204  SDLoc DL(Op);
12205  EVT ContainerVT = getContainerForFixedLengthVector(DAG, VT);
12206  SDValue Start = DAG.getConstant(SeqInfo->first, DL, ContainerVT);
12207  SDValue Steps = DAG.getStepVector(DL, ContainerVT, SeqInfo->second);
12208  SDValue Seq = DAG.getNode(ISD::ADD, DL, ContainerVT, Start, Steps);
12209  return convertFromScalableVector(DAG, Op.getValueType(), Seq);
12210  }
12211 
12212  // Revert to common legalisation for all other variants.
12213  return SDValue();
12214  }
12215 
12216  // Try to build a simple constant vector.
12217  Op = NormalizeBuildVector(Op, DAG);
12218  // Thought this might return a non-BUILD_VECTOR (e.g. CONCAT_VECTORS), if so,
12219  // abort.
12220  if (Op.getOpcode() != ISD::BUILD_VECTOR)
12221  return SDValue();
12222 
12223  if (VT.isInteger()) {
12224  // Certain vector constants, used to express things like logical NOT and
12225  // arithmetic NEG, are passed through unmodified. This allows special
12226  // patterns for these operations to match, which will lower these constants
12227  // to whatever is proven necessary.
12228  BuildVectorSDNode *BVN = cast<BuildVectorSDNode>(Op.getNode());
12229  if (BVN->isConstant())
12230  if (ConstantSDNode *Const = BVN->getConstantSplatNode()) {
12231  unsigned BitSize = VT.getVectorElementType().getSizeInBits();
12232  APInt Val(BitSize,
12233  Const->getAPIntValue().zextOrTrunc(BitSize).getZExtValue());
12234  if (Val.isZero() || Val.isAllOnes())
12235  return Op;
12236  }
12237  }
12238 
12239  if (SDValue V = ConstantBuildVector(Op, DAG))
12240  return V;
12241 
12242  // Scan through the operands to find some interesting properties we can
12243  // exploit:
12244  // 1) If only one value is used, we can use a DUP, or
12245  // 2) if only the low element is not undef, we can just insert that, or
12246  // 3) if only one constant value is used (w/ some non-constant lanes),
12247  // we can splat the constant value into the whole vector then fill
12248  // in the non-constant lanes.
12249  // 4) FIXME: If different constant values are used, but we can intelligently
12250  // select the values we'll be overwriting for the non-constant
12251  // lanes such that we can directly materialize the vector
12252  // some other way (MOVI, e.g.), we can be sneaky.
12253  // 5) if all operands are EXTRACT_VECTOR_ELT, check for VUZP.
12254  SDLoc dl(Op);
12255  unsigned NumElts = VT.getVectorNumElements();
12256  bool isOnlyLowElement = true;
12257  bool usesOnlyOneValue = true;
12258  bool usesOnlyOneConstantValue = true;
12259  bool isConstant = true;
12260  bool AllLanesExtractElt = true;
12261  unsigned NumConstantLanes = 0;
12262  unsigned NumDifferentLanes = 0;
12263  unsigned NumUndefLanes = 0;
12264  SDValue Value;
12265  SDValue ConstantValue;
12266  for (unsigned i = 0; i < NumElts; ++i) {
12267  SDValue V = Op.getOperand(i);
12269  AllLanesExtractElt = false;
12270  if (V.isUndef()) {
12271  ++NumUndefLanes;
12272  continue;
12273  }
12274  if (i > 0)
12275  isOnlyLowElement = false;
12276  if (!isIntOrFPConstant(V))
12277  isConstant = false;
12278 
12279  if (isIntOrFPConstant(V)) {
12280  ++NumConstantLanes;
12281  if (!ConstantValue.getNode())
12282  ConstantValue = V;
12283  else if (ConstantValue != V)
12284  usesOnlyOneConstantValue = false;
12285  }
12286 
12287  if (!Value.getNode())
12288  Value = V;
12289  else if (V != Value) {
12290  usesOnlyOneValue = false;
12291  ++NumDifferentLanes;
12292  }
12293  }
12294 
12295  if (!Value.getNode()) {
12296  LLVM_DEBUG(
12297  dbgs() << "LowerBUILD_VECTOR: value undefined, creating undef node\n");
12298  return DAG.getUNDEF(VT);
12299  }
12300 
12301  // Convert BUILD_VECTOR where all elements but the lowest are undef into
12302  // SCALAR_TO_VECTOR, except for when we have a single-element constant vector
12303  // as SimplifyDemandedBits will just turn that back into BUILD_VECTOR.
12304  if (isOnlyLowElement && !(NumElts == 1 && isIntOrFPConstant(Value))) {
12305  LLVM_DEBUG(dbgs() << "LowerBUILD_VECTOR: only low element used, creating 1 "
12306  "SCALAR_TO_VECTOR node\n");
12307  return DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VT, Value);
12308  }
12309 
12310  if (AllLanesExtractElt) {
12311  SDNode *Vector = nullptr;
12312  bool Even = false;
12313  bool Odd = false;
12314  // Check whether the extract elements match the Even pattern <0,2,4,...> or
12315  // the Odd pattern <1,3,5,...>.
12316  for (unsigned i = 0; i < NumElts; ++i) {
12317  SDValue V = Op.getOperand(i);
12318  const SDNode *N = V.getNode();
12319  if (!isa<ConstantSDNode>(N->getOperand(1)))
12320  break;
12321  SDValue N0 = N->getOperand(0);
12322 
12323  // All elements are extracted from the same vector.
12324  if (!Vector) {
12325  Vector = N0.getNode();
12326  // Check that the type of EXTRACT_VECTOR_ELT matches the type of
12327  // BUILD_VECTOR.
12328  if (VT.getVectorElementType() !=
12330  break;
12331  } else if (Vector != N0.getNode()) {
12332  Odd = false;
12333  Even = false;
12334  break;
12335  }
12336 
12337  // Extracted values are either at Even indices <0,2,4,...> or at Odd
12338  // indices <1,3,5,...>.
12339  uint64_t Val = N->getConstantOperandVal(1);
12340  if (Val == 2 * i) {
12341  Even = true;
12342  continue;
12343  }
12344  if (Val - 1 == 2 * i) {
12345  Odd = true;
12346  continue;
12347  }
12348 
12349  // Something does not match: abort.
12350  Odd = false;
12351  Even = false;
12352  break;
12353  }
12354  if (Even || Odd) {
12355  SDValue LHS =
12356  DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, VT, SDValue(Vector, 0),
12357  DAG.getConstant(0, dl, MVT::i64));
12358  SDValue RHS =
12359  DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, VT, SDValue(Vector, 0),
12360  DAG.getConstant(NumElts, dl, MVT::i64));
12361 
12362  if (Even && !Odd)
12363  return DAG.getNode(AArch64ISD::UZP1, dl, DAG.getVTList(VT, VT), LHS,
12364  RHS);
12365  if (Odd && !Even)
12366  return DAG.getNode(AArch64ISD::UZP2, dl, DAG.getVTList(VT, VT), LHS,
12367  RHS);
12368  }
12369  }
12370 
12371  // Use DUP for non-constant splats. For f32 constant splats, reduce to
12372  // i32 and try again.
12373  if (usesOnlyOneValue) {
12374  if (!isConstant) {
12375  if (Value.getOpcode() != ISD::EXTRACT_VECTOR_ELT ||
12376  Value.getValueType() != VT) {
12377  LLVM_DEBUG(
12378  dbgs() << "LowerBUILD_VECTOR: use DUP for non-constant splats\n");
12379  return DAG.getNode(AArch64ISD::DUP, dl, VT, Value);
12380  }
12381 
12382  // This is actually a DUPLANExx operation, which keeps everything vectory.
12383 
12384  SDValue Lane = Value.getOperand(1);
12385  Value = Value.getOperand(0);
12386  if (Value.getValueSizeInBits() == 64) {
12387  LLVM_DEBUG(
12388  dbgs() << "LowerBUILD_VECTOR: DUPLANE works on 128-bit vectors, "
12389  "widening it\n");
12390  Value = WidenVector(Value, DAG);
12391  }
12392 
12393  unsigned Opcode = getDUPLANEOp(VT.getVectorElementType());
12394  return DAG.getNode(Opcode, dl, VT, Value, Lane);
12395  }
12396 
12399  EVT EltTy = VT.getVectorElementType();
12400  assert ((EltTy == MVT::f16 || EltTy == MVT::bf16 || EltTy == MVT::f32 ||
12401  EltTy == MVT::f64) && "Unsupported floating-point vector type");
12402  LLVM_DEBUG(
12403  dbgs() << "LowerBUILD_VECTOR: float constant splats, creating int "
12404  "BITCASTS, and try again\n");
12405  MVT NewType = MVT::getIntegerVT(EltTy.getSizeInBits());
12406  for (unsigned i = 0; i < NumElts; ++i)
12407  Ops.push_back(DAG.getNode(ISD::BITCAST, dl, NewType, Op.getOperand(i)));
12408  EVT VecVT = EVT::getVectorVT(*DAG.getContext(), NewType, NumElts);
12409  SDValue Val = DAG.getBuildVector(VecVT, dl, Ops);
12410  LLVM_DEBUG(dbgs() << "LowerBUILD_VECTOR: trying to lower new vector: ";
12411  Val.dump(););
12412  Val = LowerBUILD_VECTOR(Val, DAG);
12413  if (Val.getNode())
12414  return DAG.getNode(ISD::BITCAST, dl, VT, Val);
12415  }
12416  }
12417 
12418  // If we need to insert a small number of different non-constant elements and
12419  // the vector width is sufficiently large, prefer using DUP with the common
12420  // value and INSERT_VECTOR_ELT for the different lanes. If DUP is preferred,
12421  // skip the constant lane handling below.
12422  bool PreferDUPAndInsert =
12423  !isConstant && NumDifferentLanes >= 1 &&
12424  NumDifferentLanes < ((NumElts - NumUndefLanes) / 2) &&
12425  NumDifferentLanes >= NumConstantLanes;
12426 
12427  // If there was only one constant value used and for more than one lane,
12428  // start by splatting that value, then replace the non-constant lanes. This
12429  // is better than the default, which will perform a separate initialization
12430  // for each lane.
12431  if (!PreferDUPAndInsert && NumConstantLanes > 0 && usesOnlyOneConstantValue) {
12432  // Firstly, try to materialize the splat constant.
12433  SDValue Vec = DAG.getSplatBuildVector(VT, dl, ConstantValue),
12434  Val = ConstantBuildVector(Vec, DAG);
12435  if (!Val) {
12436  // Otherwise, materialize the constant and splat it.
12437  Val = DAG.getNode(AArch64ISD::DUP, dl, VT, ConstantValue);
12438  DAG.ReplaceAllUsesWith(Vec.getNode(), &Val);
12439  }
12440 
12441  // Now insert the non-constant lanes.
12442  for (unsigned i = 0; i < NumElts; ++i) {
12443  SDValue V = Op.getOperand(i);
12444  SDValue LaneIdx = DAG.getConstant(i, dl, MVT::i64);
12445  if (!isIntOrFPConstant(V))
12446  // Note that type legalization likely mucked about with the VT of the
12447  // source operand, so we may have to convert it here before inserting.
12448  Val = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, VT, Val, V, LaneIdx);
12449  }
12450  return Val;
12451  }
12452 
12453  // This will generate a load from the constant pool.
12454  if (isConstant) {
12455  LLVM_DEBUG(
12456  dbgs() << "LowerBUILD_VECTOR: all elements are constant, use default "
12457  "expansion\n");
12458  return SDValue();
12459  }
12460 
12461  // Detect patterns of a0,a1,a2,a3,b0,b1,b2,b3,c0,c1,c2,c3,d0,d1,d2,d3 from
12462  // v4i32s. This is really a truncate, which we can construct out of (legal)
12463  // concats and truncate nodes.
12465  return M;
12466 
12467  // Empirical tests suggest this is rarely worth it for vectors of length <= 2.
12468  if (NumElts >= 4) {
12469  if (SDValue shuffle = ReconstructShuffle(Op, DAG))
12470  return shuffle;
12471  }
12472 
12473  if (PreferDUPAndInsert) {
12474  // First, build a constant vector with the common element.
12475  SmallVector<SDValue, 8> Ops(NumElts, Value);
12476  SDValue NewVector = LowerBUILD_VECTOR(DAG.getBuildVector(VT, dl, Ops), DAG);
12477  // Next, insert the elements that do not match the common value.
12478  for (unsigned I = 0; I < NumElts; ++I)
12479  if (Op.getOperand(I) != Value)
12480  NewVector =
12481  DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, VT, NewVector,
12482  Op.getOperand(I), DAG.getConstant(I, dl, MVT::i64));
12483 
12484  return NewVector;
12485  }
12486 
12487  // If all else fails, just use a sequence of INSERT_VECTOR_ELT when we
12488  // know the default expansion would otherwise fall back on something even
12489  // worse. For a vector with one or two non-undef values, that's
12490  // scalar_to_vector for the elements followed by a shuffle (provided the
12491  // shuffle is valid for the target) and materialization element by element
12492  // on the stack followed by a load for everything else.
12493  if (!isConstant && !usesOnlyOneValue) {
12494  LLVM_DEBUG(
12495  dbgs() << "LowerBUILD_VECTOR: alternatives failed, creating sequence "
12496  "of INSERT_VECTOR_ELT\n");
12497 
12498  SDValue Vec = DAG.getUNDEF(VT);
12499  SDValue Op0 = Op.getOperand(0);
12500  unsigned i = 0;
12501 
12502  // Use SCALAR_TO_VECTOR for lane zero to
12503  // a) Avoid a RMW dependency on the full vector register, and
12504  // b) Allow the register coalescer to fold away the copy if the
12505  // value is already in an S or D register, and we're forced to emit an
12506  // INSERT_SUBREG that we can't fold anywhere.
12507  //
12508  // We also allow types like i8 and i16 which are illegal scalar but legal
12509  // vector element types. After type-legalization the inserted value is
12510  // extended (i32) and it is safe to cast them to the vector type by ignoring
12511  // the upper bits of the lowest lane (e.g. v8i8, v4i16).
12512  if (!Op0.isUndef()) {
12513  LLVM_DEBUG(dbgs() << "Creating node for op0, it is not undefined:\n");
12514  Vec = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VT, Op0);
12515  ++i;
12516  }
12517  LLVM_DEBUG(if (i < NumElts) dbgs()
12518  << "Creating nodes for the other vector elements:\n";);
12519  for (; i < NumElts; ++i) {
12520  SDValue V = Op.getOperand(i);
12521  if (V.isUndef())
12522  continue;
12523  SDValue LaneIdx = DAG.getConstant(i, dl, MVT::i64);
12524  Vec = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, VT, Vec, V, LaneIdx);
12525  }
12526  return Vec;
12527  }
12528 
12529  LLVM_DEBUG(
12530  dbgs() << "LowerBUILD_VECTOR: use default expansion, failed to find "
12531  "better alternative\n");
12532  return SDValue();
12533 }
12534 
12535 SDValue AArch64TargetLowering::LowerCONCAT_VECTORS(SDValue Op,
12536  SelectionDAG &DAG) const {
12537  if (useSVEForFixedLengthVectorVT(Op.getValueType(),
12538  Subtarget->forceStreamingCompatibleSVE()))
12539  return LowerFixedLengthConcatVectorsToSVE(Op, DAG);
12540 
12541  assert(Op.getValueType().isScalableVector() &&
12542  isTypeLegal(Op.getValueType()) &&
12543  "Expected legal scalable vector type!");
12544 
12545  if (isTypeLegal(Op.getOperand(0).getValueType())) {
12546  unsigned NumOperands = Op->getNumOperands();
12547  assert(NumOperands > 1 && isPowerOf2_32(NumOperands) &&
12548  "Unexpected number of operands in CONCAT_VECTORS");
12549 
12550  if (NumOperands == 2)
12551  return Op;
12552 
12553  // Concat each pair of subvectors and pack into the lower half of the array.
12554  SmallVector<SDValue> ConcatOps(Op->op_begin(), Op->op_end());
12555  while (ConcatOps.size() > 1) {
12556  for (unsigned I = 0, E = ConcatOps.size(); I != E; I += 2) {
12557  SDValue V1 = ConcatOps[I];
12558  SDValue V2 = ConcatOps[I + 1];
12559  EVT SubVT = V1.getValueType();
12560  EVT PairVT = SubVT.getDoubleNumVectorElementsVT(*DAG.getContext());
12561  ConcatOps[I / 2] =
12562  DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(Op), PairVT, V1, V2);
12563  }
12564  ConcatOps.resize(ConcatOps.size() / 2);
12565  }
12566  return ConcatOps[0];
12567  }
12568 
12569  return SDValue();
12570 }
12571 
12572 SDValue AArch64TargetLowering::LowerINSERT_VECTOR_ELT(SDValue Op,
12573  SelectionDAG &DAG) const {
12574  assert(Op.getOpcode() == ISD::INSERT_VECTOR_ELT && "Unknown opcode!");
12575 
12576  if (useSVEForFixedLengthVectorVT(Op.getValueType(),
12577  Subtarget->forceStreamingCompatibleSVE()))
12578  return LowerFixedLengthInsertVectorElt(Op, DAG);
12579 
12580  // Check for non-constant or out of range lane.
12581  EVT VT = Op.getOperand(0).getValueType();
12582 
12583  if (VT.getScalarType() == MVT::i1) {
12584  EVT VectorVT = getPromotedVTForPredicate(VT);
12585  SDLoc DL(Op);
12586  SDValue ExtendedVector =
12587  DAG.getAnyExtOrTrunc(Op.getOperand(0), DL, VectorVT);
12588  SDValue ExtendedValue =
12589  DAG.getAnyExtOrTrunc(Op.getOperand(1), DL,
12590  VectorVT.getScalarType().getSizeInBits() < 32
12591  ? MVT::i32
12592  : VectorVT.getScalarType());
12593  ExtendedVector =
12594  DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, VectorVT, ExtendedVector,
12595  ExtendedValue, Op.getOperand(2));
12596  return DAG.getAnyExtOrTrunc(ExtendedVector, DL, VT);
12597  }
12598 
12599  ConstantSDNode *CI = dyn_cast<ConstantSDNode>(Op.getOperand(2));
12600  if (!CI || CI->getZExtValue() >= VT.getVectorNumElements())
12601  return SDValue();
12602 
12603  // Insertion/extraction are legal for V128 types.
12604  if (VT == MVT::v16i8 || VT == MVT::v8i16 || VT == MVT::v4i32 ||
12605  VT == MVT::v2i64 || VT == MVT::v4f32 || VT == MVT::v2f64 ||
12606  VT == MVT::v8f16 || VT == MVT::v8bf16)
12607  return Op;
12608 
12609  if (VT != MVT::v8i8 && VT != MVT::v4i16 && VT != MVT::v2i32 &&
12610  VT != MVT::v1i64 && VT != MVT::v2f32 && VT != MVT::v4f16 &&
12611  VT != MVT::v4bf16)
12612  return SDValue();
12613 
12614  // For V64 types, we perform insertion by expanding the value
12615  // to a V128 type and perform the insertion on that.
12616  SDLoc DL(Op);
12617  SDValue WideVec = WidenVector(Op.getOperand(0), DAG);
12618  EVT WideTy = WideVec.getValueType();
12619 
12620  SDValue Node = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, WideTy, WideVec,
12621  Op.getOperand(1), Op.getOperand(2));
12622  // Re-narrow the resultant vector.
12623  return NarrowVector(Node, DAG);
12624 }
12625 
12626 SDValue
12627 AArch64TargetLowering::LowerEXTRACT_VECTOR_ELT(SDValue Op,
12628  SelectionDAG &DAG) const {
12629  assert(Op.getOpcode() == ISD::EXTRACT_VECTOR_ELT && "Unknown opcode!");
12630  EVT VT = Op.getOperand(0).getValueType();
12631 
12632  if (VT.getScalarType() == MVT::i1) {
12633  // We can't directly extract from an SVE predicate; extend it first.
12634  // (This isn't the only possible lowering, but it's straightforward.)
12635  EVT VectorVT = getPromotedVTForPredicate(VT);
12636  SDLoc DL(Op);
12637  SDValue Extend =
12638  DAG.getNode(ISD::ANY_EXTEND, DL, VectorVT, Op.getOperand(0));
12639  MVT ExtractTy = VectorVT == MVT::nxv2i64 ? MVT::i64 : MVT::i32;
12640  SDValue Extract = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, ExtractTy,
12641  Extend, Op.getOperand(1));
12642  return DAG.getAnyExtOrTrunc(Extract, DL, Op.getValueType());
12643  }
12644 
12646  Subtarget->forceStreamingCompatibleSVE()))
12647  return LowerFixedLengthExtractVectorElt(Op, DAG);
12648 
12649  // Check for non-constant or out of range lane.
12650  ConstantSDNode *CI = dyn_cast<ConstantSDNode>(Op.getOperand(1));
12651  if (!CI || CI->getZExtValue() >= VT.getVectorNumElements())
12652  return SDValue();
12653 
12654  // Insertion/extraction are legal for V128 types.
12655  if (VT == MVT::v16i8 || VT == MVT::v8i16 || VT == MVT::v4i32 ||
12656  VT == MVT::v2i64 || VT == MVT::v4f32 || VT == MVT::v2f64 ||
12657  VT == MVT::v8f16 || VT == MVT::v8bf16)
12658  return Op;
12659 
12660  if (VT != MVT::v8i8 && VT != MVT::v4i16 && VT != MVT::v2i32 &&
12661  VT != MVT::v1i64 && VT != MVT::v2f32 && VT != MVT::v4f16 &&
12662  VT != MVT::v4bf16)
12663  return SDValue();
12664 
12665  // For V64 types, we perform extraction by expanding the value
12666  // to a V128 type and perform the extraction on that.
12667  SDLoc DL(Op);
12668  SDValue WideVec = WidenVector(Op.getOperand(0), DAG);
12669  EVT WideTy = WideVec.getValueType();
12670 
12671  EVT ExtrTy = WideTy.getVectorElementType();
12672  if (ExtrTy == MVT::i16 || ExtrTy == MVT::i8)
12673  ExtrTy = MVT::i32;
12674 
12675  // For extractions, we just return the result directly.
12676  return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, ExtrTy, WideVec,
12677  Op.getOperand(1));
12678 }
12679 
12680 SDValue AArch64TargetLowering::LowerEXTRACT_SUBVECTOR(SDValue Op,
12681  SelectionDAG &DAG) const {
12682  assert(Op.getValueType().isFixedLengthVector() &&
12683  "Only cases that extract a fixed length vector are supported!");
12684 
12685  EVT InVT = Op.getOperand(0).getValueType();
12686  unsigned Idx = cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue();
12687  unsigned Size = Op.getValueSizeInBits();
12688 
12689  // If we don't have legal types yet, do nothing
12690  if (!DAG.getTargetLoweringInfo().isTypeLegal(InVT))
12691  return SDValue();
12692 
12693  if (InVT.isScalableVector()) {
12694  // This will be matched by custom code during ISelDAGToDAG.
12695  if (Idx == 0 && isPackedVectorType(InVT, DAG))
12696  return Op;
12697 
12698  return SDValue();
12699  }
12700 
12701  // This will get lowered to an appropriate EXTRACT_SUBREG in ISel.
12702  if (Idx == 0 && InVT.getSizeInBits() <= 128)
12703  return Op;
12704 
12705  // If this is extracting the upper 64-bits of a 128-bit vector, we match
12706  // that directly.
12707  if (Size == 64 && Idx * InVT.getScalarSizeInBits() == 64 &&
12708  InVT.getSizeInBits() == 128 && !Subtarget->forceStreamingCompatibleSVE())
12709  return Op;
12710 
12712  Subtarget->forceStreamingCompatibleSVE())) {
12713  SDLoc DL(Op);
12714 
12715  EVT ContainerVT = getContainerForFixedLengthVector(DAG, InVT);
12716  SDValue NewInVec =
12717  convertToScalableVector(DAG, ContainerVT, Op.getOperand(0));
12718 
12719  SDValue Splice = DAG.getNode(ISD::VECTOR_SPLICE, DL, ContainerVT, NewInVec,
12720  NewInVec, DAG.getConstant(Idx, DL, MVT::i64));
12721  return convertFromScalableVector(DAG, Op.getValueType(), Splice);
12722  }
12723 
12724  return SDValue();
12725 }
12726 
12727 SDValue AArch64TargetLowering::LowerINSERT_SUBVECTOR(SDValue Op,
12728  SelectionDAG &DAG) const {
12729  assert(Op.getValueType().isScalableVector() &&
12730  "Only expect to lower inserts into scalable vectors!");
12731 
12732  EVT InVT = Op.getOperand(1).getValueType();
12733  unsigned Idx = cast<ConstantSDNode>(Op.getOperand(2))->getZExtValue();
12734 
12735  SDValue Vec0 = Op.getOperand(0);
12736  SDValue Vec1 = Op.getOperand(1);
12737  SDLoc DL(Op);
12738  EVT VT = Op.getValueType();
12739 
12740  if (InVT.isScalableVector()) {
12741  if (!isTypeLegal(VT))
12742  return SDValue();
12743 
12744  // Break down insert_subvector into simpler parts.
12745  if (VT.getVectorElementType() == MVT::i1) {
12746  unsigned NumElts = VT.getVectorMinNumElements();
12747  EVT HalfVT = VT.getHalfNumVectorElementsVT(*DAG.getContext());
12748 
12749  SDValue Lo, Hi;
12750  Lo = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, HalfVT, Vec0,
12751  DAG.getVectorIdxConstant(0, DL));
12752  Hi = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, HalfVT, Vec0,
12753  DAG.getVectorIdxConstant(NumElts / 2, DL));
12754  if (Idx < (NumElts / 2)) {
12755  SDValue NewLo = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, HalfVT, Lo, Vec1,
12756  DAG.getVectorIdxConstant(Idx, DL));
12757  return DAG.getNode(AArch64ISD::UZP1, DL, VT, NewLo, Hi);
12758  } else {
12759  SDValue NewHi =
12760  DAG.getNode(ISD::INSERT_SUBVECTOR, DL, HalfVT, Hi, Vec1,
12761  DAG.getVectorIdxConstant(Idx - (NumElts / 2), DL));
12762  return DAG.getNode(AArch64ISD::UZP1, DL, VT, Lo, NewHi);
12763  }
12764  }
12765 
12766  // Ensure the subvector is half the size of the main vector.
12767  if (VT.getVectorElementCount() != (InVT.getVectorElementCount() * 2))
12768  return SDValue();
12769 
12770  // Here narrow and wide refers to the vector element types. After "casting"
12771  // both vectors must have the same bit length and so because the subvector
12772  // has fewer elements, those elements need to be bigger.
12775 
12776  // NOP cast operands to the largest legal vector of the same element count.
12777  if (VT.isFloatingPoint()) {
12778  Vec0 = getSVESafeBitCast(NarrowVT, Vec0, DAG);
12779  Vec1 = getSVESafeBitCast(WideVT, Vec1, DAG);
12780  } else {
12781  // Legal integer vectors are already their largest so Vec0 is fine as is.
12782  Vec1 = DAG.getNode(ISD::ANY_EXTEND, DL, WideVT, Vec1);
12783  }
12784 
12785  // To replace the top/bottom half of vector V with vector SubV we widen the
12786  // preserved half of V, concatenate this to SubV (the order depending on the
12787  // half being replaced) and then narrow the result.
12788  SDValue Narrow;
12789  if (Idx == 0) {
12790  SDValue HiVec0 = DAG.getNode(AArch64ISD::UUNPKHI, DL, WideVT, Vec0);
12791  Narrow = DAG.getNode(AArch64ISD::UZP1, DL, NarrowVT, Vec1, HiVec0);
12792  } else {
12793  assert(Idx == InVT.getVectorMinNumElements() &&
12794  "Invalid subvector index!");
12795  SDValue LoVec0 = DAG.getNode(AArch64ISD::UUNPKLO, DL, WideVT, Vec0);
12796  Narrow = DAG.getNode(AArch64ISD::UZP1, DL, NarrowVT, LoVec0, Vec1);
12797  }
12798 
12799  return getSVESafeBitCast(VT, Narrow, DAG);
12800  }
12801 
12802  if (Idx == 0 && isPackedVectorType(VT, DAG)) {
12803  // This will be matched by custom code during ISelDAGToDAG.
12804  if (Vec0.isUndef())
12805  return Op;
12806 
12807  std::optional<unsigned> PredPattern =
12809  auto PredTy = VT.changeVectorElementType(MVT::i1);
12810  SDValue PTrue = getPTrue(DAG, DL, PredTy, *PredPattern);
12811  SDValue ScalableVec1 = convertToScalableVector(DAG, VT, Vec1);
12812  return DAG.getNode(ISD::VSELECT, DL, VT, PTrue, ScalableVec1, Vec0);
12813  }
12814 
12815  return SDValue();
12816 }
12817 
12818 static bool isPow2Splat(SDValue Op, uint64_t &SplatVal, bool &Negated) {
12819  if (Op.getOpcode() != AArch64ISD::DUP &&
12820  Op.getOpcode() != ISD::SPLAT_VECTOR &&
12821  Op.getOpcode() != ISD::BUILD_VECTOR)
12822  return false;
12823 
12824  if (Op.getOpcode() == ISD::BUILD_VECTOR &&
12825  !isAllConstantBuildVector(Op, SplatVal))
12826  return false;
12827 
12828  if (Op.getOpcode() != ISD::BUILD_VECTOR &&
12829  !isa<ConstantSDNode>(Op->getOperand(0)))
12830  return false;
12831 
12832  SplatVal = Op->getConstantOperandVal(0);
12833  if (Op.getValueType().getVectorElementType() != MVT::i64)
12834  SplatVal = (int32_t)SplatVal;
12835 
12836  Negated = false;
12837  if (isPowerOf2_64(SplatVal))
12838  return true;
12839 
12840  Negated = true;
12841  if (isPowerOf2_64(-SplatVal)) {
12842  SplatVal = -SplatVal;
12843  return true;
12844  }
12845 
12846  return false;
12847 }
12848 
12849 SDValue AArch64TargetLowering::LowerDIV(SDValue Op, SelectionDAG &DAG) const {
12850  EVT VT = Op.getValueType();
12851  SDLoc dl(Op);
12852 
12853  if (useSVEForFixedLengthVectorVT(VT, /*OverrideNEON=*/true))
12854  return LowerFixedLengthVectorIntDivideToSVE(Op, DAG);
12855 
12856  assert(VT.isScalableVector() && "Expected a scalable vector.");
12857 
12858  bool Signed = Op.getOpcode() == ISD::SDIV;
12859  unsigned PredOpcode = Signed ? AArch64ISD::SDIV_PRED : AArch64ISD::UDIV_PRED;
12860 
12861  bool Negated;
12862  uint64_t SplatVal;
12863  if (Signed && isPow2Splat(Op.getOperand(1), SplatVal, Negated)) {
12864  SDValue Pg = getPredicateForScalableVector(DAG, dl, VT);
12865  SDValue Res =
12866  DAG.getNode(AArch64ISD::SRAD_MERGE_OP1, dl, VT, Pg, Op->getOperand(0),
12867  DAG.getTargetConstant(Log2_64(SplatVal), dl, MVT::i32));
12868  if (Negated)
12869  Res = DAG.getNode(ISD::SUB, dl, VT, DAG.getConstant(0, dl, VT), Res);
12870 
12871  return Res;
12872  }
12873 
12874  if (VT == MVT::nxv4i32 || VT == MVT::nxv2i64)
12875  return LowerToPredicatedOp(Op, DAG, PredOpcode);
12876 
12877  // SVE doesn't have i8 and i16 DIV operations; widen them to 32-bit
12878  // operations, and truncate the result.
12879  EVT WidenedVT;
12880  if (VT == MVT::nxv16i8)
12881  WidenedVT = MVT::nxv8i16;
12882  else if (VT == MVT::nxv8i16)
12883  WidenedVT = MVT::nxv4i32;
12884  else
12885  llvm_unreachable("Unexpected Custom DIV operation");
12886 
12887  unsigned UnpkLo = Signed ? AArch64ISD::SUNPKLO : AArch64ISD::UUNPKLO;
12888  unsigned UnpkHi = Signed ? AArch64ISD::SUNPKHI : AArch64ISD::UUNPKHI;
12889  SDValue Op0Lo = DAG.getNode(UnpkLo, dl, WidenedVT, Op.getOperand(0));
12890  SDValue Op1Lo = DAG.getNode(UnpkLo, dl, WidenedVT, Op.getOperand(1));
12891  SDValue Op0Hi = DAG.getNode(UnpkHi, dl, WidenedVT, Op.getOperand(0));
12892  SDValue Op1Hi = DAG.getNode(UnpkHi, dl, WidenedVT, Op.getOperand(1));
12893  SDValue ResultLo = DAG.getNode(Op.getOpcode(), dl, WidenedVT, Op0Lo, Op1Lo);
12894  SDValue ResultHi = DAG.getNode(Op.getOpcode(), dl, WidenedVT, Op0Hi, Op1Hi);
12895  return DAG.getNode(AArch64ISD::UZP1, dl, VT, ResultLo, ResultHi);
12896 }
12897 
12899  // Currently no fixed length shuffles that require SVE are legal.
12901  Subtarget->forceStreamingCompatibleSVE()))
12902  return false;
12903 
12904  if (VT.getVectorNumElements() == 4 &&
12905  (VT.is128BitVector() || VT.is64BitVector())) {
12906  unsigned Cost = getPerfectShuffleCost(M);
12907  if (Cost <= 1)
12908  return true;
12909  }
12910 
12911  bool DummyBool;
12912  int DummyInt;
12913  unsigned DummyUnsigned;
12914 
12915  return (ShuffleVectorSDNode::isSplatMask(&M[0], VT) || isREVMask(M, VT, 64) ||
12916  isREVMask(M, VT, 32) || isREVMask(M, VT, 16) ||
12917  isEXTMask(M, VT, DummyBool, DummyUnsigned) ||
12918  // isTBLMask(M, VT) || // FIXME: Port TBL support from ARM.
12919  isTRNMask(M, VT, DummyUnsigned) || isUZPMask(M, VT, DummyUnsigned) ||
12920  isZIPMask(M, VT, DummyUnsigned) ||
12921  isTRN_v_undef_Mask(M, VT, DummyUnsigned) ||
12922  isUZP_v_undef_Mask(M, VT, DummyUnsigned) ||
12923  isZIP_v_undef_Mask(M, VT, DummyUnsigned) ||
12924  isINSMask(M, VT.getVectorNumElements(), DummyBool, DummyInt) ||
12925  isConcatMask(M, VT, VT.getSizeInBits() == 128));
12926 }
12927 
12929  EVT VT) const {
12930  // Just delegate to the generic legality, clear masks aren't special.
12931  return isShuffleMaskLegal(M, VT);
12932 }
12933 
12934 /// getVShiftImm - Check if this is a valid build_vector for the immediate
12935 /// operand of a vector shift operation, where all the elements of the
12936 /// build_vector must have the same constant integer value.
12937 static bool getVShiftImm(SDValue Op, unsigned ElementBits, int64_t &Cnt) {
12938  // Ignore bit_converts.
12939  while (Op.getOpcode() == ISD::BITCAST)
12940  Op = Op.getOperand(0);
12941  BuildVectorSDNode *BVN = dyn_cast<BuildVectorSDNode>(Op.getNode());
12942  APInt SplatBits, SplatUndef;
12943  unsigned SplatBitSize;
12944  bool HasAnyUndefs;
12945  if (!BVN || !BVN->isConstantSplat(SplatBits, SplatUndef, SplatBitSize,
12946  HasAnyUndefs, ElementBits) ||
12947  SplatBitSize > ElementBits)
12948  return false;
12949  Cnt = SplatBits.getSExtValue();
12950  return true;
12951 }
12952 
12953 /// isVShiftLImm - Check if this is a valid build_vector for the immediate
12954 /// operand of a vector shift left operation. That value must be in the range:
12955 /// 0 <= Value < ElementBits for a left shift; or
12956 /// 0 <= Value <= ElementBits for a long left shift.
12957 static bool isVShiftLImm(SDValue Op, EVT VT, bool isLong, int64_t &Cnt) {
12958  assert(VT.isVector() && "vector shift count is not a vector type");
12959  int64_t ElementBits = VT.getScalarSizeInBits();
12960  if (!getVShiftImm(Op, ElementBits, Cnt))
12961  return false;
12962  return (Cnt >= 0 && (isLong ? Cnt - 1 : Cnt) < ElementBits);
12963 }
12964 
12965 /// isVShiftRImm - Check if this is a valid build_vector for the immediate
12966 /// operand of a vector shift right operation. The value must be in the range:
12967 /// 1 <= Value <= ElementBits for a right shift; or
12968 static bool isVShiftRImm(SDValue Op, EVT VT, bool isNarrow, int64_t &Cnt) {
12969  assert(VT.isVector() && "vector shift count is not a vector type");
12970  int64_t ElementBits = VT.getScalarSizeInBits();
12971  if (!getVShiftImm(Op, ElementBits, Cnt))
12972  return false;
12973  return (Cnt >= 1 && Cnt <= (isNarrow ? ElementBits / 2 : ElementBits));
12974 }
12975 
12976 SDValue AArch64TargetLowering::LowerTRUNCATE(SDValue Op,
12977  SelectionDAG &DAG) const {
12978  EVT VT = Op.getValueType();
12979 
12980  if (VT.getScalarType() == MVT::i1) {
12981  // Lower i1 truncate to `(x & 1) != 0`.
12982  SDLoc dl(Op);
12983  EVT OpVT = Op.getOperand(0).getValueType();
12984  SDValue Zero = DAG.getConstant(0, dl, OpVT);
12985  SDValue One = DAG.getConstant(1, dl, OpVT);
12986  SDValue And = DAG.getNode(ISD::AND, dl, OpVT, Op.getOperand(0), One);
12987  return DAG.getSetCC(dl, VT, And, Zero, ISD::SETNE);
12988  }
12989 
12990  if (!VT.isVector() || VT.isScalableVector())
12991  return SDValue();
12992 
12993  if (useSVEForFixedLengthVectorVT(Op.getOperand(0).getValueType(),
12994  Subtarget->forceStreamingCompatibleSVE()))
12995  return LowerFixedLengthVectorTruncateToSVE(Op, DAG);
12996 
12997  return SDValue();
12998 }
12999 
13000 SDValue AArch64TargetLowering::LowerVectorSRA_SRL_SHL(SDValue Op,
13001  SelectionDAG &DAG) const {
13002  EVT VT = Op.getValueType();
13003  SDLoc DL(Op);
13004  int64_t Cnt;
13005 
13006  if (!Op.getOperand(1).getValueType().isVector())
13007  return Op;
13008  unsigned EltSize = VT.getScalarSizeInBits();
13009 
13010  switch (Op.getOpcode()) {
13011  case ISD::SHL:
13012  if (VT.isScalableVector() ||
13014  Subtarget->forceStreamingCompatibleSVE()))
13015  return LowerToPredicatedOp(Op, DAG, AArch64ISD::SHL_PRED);
13016 
13017  if (isVShiftLImm(Op.getOperand(1), VT, false, Cnt) && Cnt < EltSize)
13018  return DAG.getNode(AArch64ISD::VSHL, DL, VT, Op.getOperand(0),
13019  DAG.getConstant(Cnt, DL, MVT::i32));
13020  return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, VT,
13021  DAG.getConstant(Intrinsic::aarch64_neon_ushl, DL,
13022  MVT::i32),
13023  Op.getOperand(0), Op.getOperand(1));
13024  case ISD::SRA:
13025  case ISD::SRL:
13026  if (VT.isScalableVector() ||
13028  VT, Subtarget->forceStreamingCompatibleSVE())) {
13029  unsigned Opc = Op.getOpcode() == ISD::SRA ? AArch64ISD::SRA_PRED
13031  return LowerToPredicatedOp(Op, DAG, Opc);
13032  }
13033 
13034  // Right shift immediate
13035  if (isVShiftRImm(Op.getOperand(1), VT, false, Cnt) && Cnt < EltSize) {
13036  unsigned Opc =
13037  (Op.getOpcode() == ISD::SRA) ? AArch64ISD::VASHR : AArch64ISD::VLSHR;
13038  return DAG.getNode(Opc, DL, VT, Op.getOperand(0),
13039  DAG.getConstant(Cnt, DL, MVT::i32));
13040  }
13041 
13042  // Right shift register. Note, there is not a shift right register
13043  // instruction, but the shift left register instruction takes a signed
13044  // value, where negative numbers specify a right shift.
13045  unsigned Opc = (Op.getOpcode() == ISD::SRA) ? Intrinsic::aarch64_neon_sshl
13046  : Intrinsic::aarch64_neon_ushl;
13047  // negate the shift amount
13048  SDValue NegShift = DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT),
13049  Op.getOperand(1));
13050  SDValue NegShiftLeft =
13052  DAG.getConstant(Opc, DL, MVT::i32), Op.getOperand(0),
13053  NegShift);
13054  return NegShiftLeft;
13055  }
13056 
13057  llvm_unreachable("unexpected shift opcode");
13058 }
13059 
13061  AArch64CC::CondCode CC, bool NoNans, EVT VT,
13062  const SDLoc &dl, SelectionDAG &DAG) {
13063  EVT SrcVT = LHS.getValueType();
13064  assert(VT.getSizeInBits() == SrcVT.getSizeInBits() &&
13065  "function only supposed to emit natural comparisons");
13066 
13067  BuildVectorSDNode *BVN = dyn_cast<BuildVectorSDNode>(RHS.getNode());
13068  APInt CnstBits(VT.getSizeInBits(), 0);
13069  APInt UndefBits(VT.getSizeInBits(), 0);
13070  bool IsCnst = BVN && resolveBuildVector(BVN, CnstBits, UndefBits);
13071  bool IsZero = IsCnst && (CnstBits == 0);
13072 
13073  if (SrcVT.getVectorElementType().isFloatingPoint()) {
13074  switch (CC) {
13075  default:
13076  return SDValue();
13077  case AArch64CC::NE: {
13078  SDValue Fcmeq;
13079  if (IsZero)
13080  Fcmeq = DAG.getNode(AArch64ISD::FCMEQz, dl, VT, LHS);
13081  else
13082  Fcmeq = DAG.getNode(AArch64ISD::FCMEQ, dl, VT, LHS, RHS);
13083  return DAG.getNOT(dl, Fcmeq, VT);
13084  }
13085  case AArch64CC::EQ:
13086  if (IsZero)
13087  return DAG.getNode(AArch64ISD::FCMEQz, dl, VT, LHS);
13088  return DAG.getNode(AArch64ISD::FCMEQ, dl, VT, LHS, RHS);
13089  case AArch64CC::GE:
13090  if (IsZero)
13091  return DAG.getNode(AArch64ISD::FCMGEz, dl, VT, LHS);
13092  return DAG.getNode(AArch64ISD::FCMGE, dl, VT, LHS, RHS);
13093  case AArch64CC::GT:
13094  if (IsZero)
13095  return DAG.getNode(AArch64ISD::FCMGTz, dl, VT, LHS);
13096  return DAG.getNode(AArch64ISD::FCMGT, dl, VT, LHS, RHS);
13097  case AArch64CC::LE:
13098  if (!NoNans)
13099  return SDValue();
13100  // If we ignore NaNs then we can use to the LS implementation.
13101  [[fallthrough]];
13102  case AArch64CC::LS:
13103  if (IsZero)
13104  return DAG.getNode(AArch64ISD::FCMLEz, dl, VT, LHS);
13105  return DAG.getNode(AArch64ISD::FCMGE, dl, VT, RHS, LHS);
13106  case AArch64CC::LT:
13107  if (!NoNans)
13108  return SDValue();
13109  // If we ignore NaNs then we can use to the MI implementation.
13110  [[fallthrough]];
13111  case AArch64CC::MI:
13112  if (IsZero)
13113  return DAG.getNode(AArch64ISD::FCMLTz, dl, VT, LHS);
13114  return DAG.getNode(AArch64ISD::FCMGT, dl, VT, RHS, LHS);
13115  }
13116  }
13117 
13118  switch (CC) {
13119  default:
13120  return SDValue();
13121  case AArch64CC::NE: {
13122  SDValue Cmeq;
13123  if (IsZero)
13124  Cmeq = DAG.getNode(AArch64ISD::CMEQz, dl, VT, LHS);
13125  else
13126  Cmeq = DAG.getNode(AArch64ISD::CMEQ, dl, VT, LHS, RHS);
13127  return DAG.getNOT(dl, Cmeq, VT);
13128  }
13129  case AArch64CC::EQ:
13130  if (IsZero)
13131  return DAG.getNode(AArch64ISD::CMEQz, dl, VT, LHS);
13132  return DAG.getNode(AArch64ISD::CMEQ, dl, VT, LHS, RHS);
13133  case AArch64CC::GE:
13134  if (IsZero)
13135  return DAG.getNode(AArch64ISD::CMGEz, dl, VT, LHS);
13136  return DAG.getNode(AArch64ISD::CMGE, dl, VT, LHS, RHS);
13137  case AArch64CC::GT:
13138  if (IsZero)
13139  return DAG.getNode(AArch64ISD::CMGTz, dl, VT, LHS);
13140  return DAG.getNode(AArch64ISD::CMGT, dl, VT, LHS, RHS);
13141  case AArch64CC::LE:
13142  if (IsZero)
13143  return DAG.getNode(AArch64ISD::CMLEz, dl, VT, LHS);
13144  return DAG.getNode(AArch64ISD::CMGE, dl, VT, RHS, LHS);
13145  case AArch64CC::LS:
13146  return DAG.getNode(AArch64ISD::CMHS, dl, VT, RHS, LHS);
13147  case AArch64CC::LO:
13148  return DAG.getNode(AArch64ISD::CMHI, dl, VT, RHS, LHS);
13149  case AArch64CC::LT:
13150  if (IsZero)
13151  return DAG.getNode(AArch64ISD::CMLTz, dl, VT, LHS);
13152  return DAG.getNode(AArch64ISD::CMGT, dl, VT, RHS, LHS);
13153  case AArch64CC::HI:
13154  return DAG.getNode(AArch64ISD::CMHI, dl, VT, LHS, RHS);
13155  case AArch64CC::HS:
13156  return DAG.getNode(AArch64ISD::CMHS, dl, VT, LHS, RHS);
13157  }
13158 }
13159 
13160 SDValue AArch64TargetLowering::LowerVSETCC(SDValue Op,
13161  SelectionDAG &DAG) const {
13162  if (Op.getValueType().isScalableVector())
13163  return LowerToPredicatedOp(Op, DAG, AArch64ISD::SETCC_MERGE_ZERO);
13164 
13165  if (useSVEForFixedLengthVectorVT(Op.getOperand(0).getValueType(),
13166  Subtarget->forceStreamingCompatibleSVE()))
13167  return LowerFixedLengthVectorSetccToSVE(Op, DAG);
13168 
13169  ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(2))->get();
13170  SDValue LHS = Op.getOperand(0);
13171  SDValue RHS = Op.getOperand(1);
13172  EVT CmpVT = LHS.getValueType().changeVectorElementTypeToInteger();
13173  SDLoc dl(Op);
13174 
13175  if (LHS.getValueType().getVectorElementType().isInteger()) {
13176  assert(LHS.getValueType() == RHS.getValueType());
13178  SDValue Cmp =
13179  EmitVectorComparison(LHS, RHS, AArch64CC, false, CmpVT, dl, DAG);
13180  return DAG.getSExtOrTrunc(Cmp, dl, Op.getValueType());
13181  }
13182 
13183  const bool FullFP16 = DAG.getSubtarget<AArch64Subtarget>().hasFullFP16();
13184 
13185  // Make v4f16 (only) fcmp operations utilise vector instructions
13186  // v8f16 support will be a litle more complicated
13187  if (!FullFP16 && LHS.getValueType().getVectorElementType() == MVT::f16) {
13188  if (LHS.getValueType().getVectorNumElements() == 4) {
13189  LHS = DAG.getNode(ISD::FP_EXTEND, dl, MVT::v4f32, LHS);
13190  RHS = DAG.getNode(ISD::FP_EXTEND, dl, MVT::v4f32, RHS);
13191  SDValue NewSetcc = DAG.getSetCC(dl, MVT::v4i16, LHS, RHS, CC);
13192  DAG.ReplaceAllUsesWith(Op, NewSetcc);
13193  CmpVT = MVT::v4i32;
13194  } else
13195  return SDValue();
13196  }
13197 
13198  assert((!FullFP16 && LHS.getValueType().getVectorElementType() != MVT::f16) ||
13199  LHS.getValueType().getVectorElementType() != MVT::f128);
13200 
13201  // Unfortunately, the mapping of LLVM FP CC's onto AArch64 CC's isn't totally
13202  // clean. Some of them require two branches to implement.
13203  AArch64CC::CondCode CC1, CC2;
13204  bool ShouldInvert;
13205  changeVectorFPCCToAArch64CC(CC, CC1, CC2, ShouldInvert);
13206 
13207  bool NoNaNs = getTargetMachine().Options.NoNaNsFPMath || Op->getFlags().hasNoNaNs();
13208  SDValue Cmp =
13209  EmitVectorComparison(LHS, RHS, CC1, NoNaNs, CmpVT, dl, DAG);
13210  if (!Cmp.getNode())
13211  return SDValue();
13212 
13213  if (CC2 != AArch64CC::AL) {
13214  SDValue Cmp2 =
13215  EmitVectorComparison(LHS, RHS, CC2, NoNaNs, CmpVT, dl, DAG);
13216  if (!Cmp2.getNode())
13217  return SDValue();
13218 
13219  Cmp = DAG.getNode(ISD::OR, dl, CmpVT, Cmp, Cmp2);
13220  }
13221 
13222  Cmp = DAG.getSExtOrTrunc(Cmp, dl, Op.getValueType());
13223 
13224  if (ShouldInvert)
13225  Cmp = DAG.getNOT(dl, Cmp, Cmp.getValueType());
13226 
13227  return Cmp;
13228 }
13229 
13230 static SDValue getReductionSDNode(unsigned Op, SDLoc DL, SDValue ScalarOp,
13231  SelectionDAG &DAG) {
13232  SDValue VecOp = ScalarOp.getOperand(0);
13233  auto Rdx = DAG.getNode(Op, DL, VecOp.getSimpleValueType(), VecOp);
13234  return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, ScalarOp.getValueType(), Rdx,
13235  DAG.getConstant(0, DL, MVT::i64));
13236 }
13237 
13238 SDValue AArch64TargetLowering::LowerVECREDUCE(SDValue Op,
13239  SelectionDAG &DAG) const {
13240  SDValue Src = Op.getOperand(0);
13241 
13242  // Try to lower fixed length reductions to SVE.
13243  EVT SrcVT = Src.getValueType();
13244  bool OverrideNEON = Subtarget->forceStreamingCompatibleSVE() ||
13245  Op.getOpcode() == ISD::VECREDUCE_AND ||
13246  Op.getOpcode() == ISD::VECREDUCE_OR ||
13247  Op.getOpcode() == ISD::VECREDUCE_XOR ||
13248  Op.getOpcode() == ISD::VECREDUCE_FADD ||
13249  (Op.getOpcode() != ISD::VECREDUCE_ADD &&
13250  SrcVT.getVectorElementType() == MVT::i64);
13251  if (SrcVT.isScalableVector() ||
13253  SrcVT, OverrideNEON && Subtarget->useSVEForFixedLengthVectors())) {
13254 
13255  if (SrcVT.getVectorElementType() == MVT::i1)
13256  return LowerPredReductionToSVE(Op, DAG);
13257 
13258  switch (Op.getOpcode()) {
13259  case ISD::VECREDUCE_ADD:
13260  return LowerReductionToSVE(AArch64ISD::UADDV_PRED, Op, DAG);
13261  case ISD::VECREDUCE_AND:
13262  return LowerReductionToSVE(AArch64ISD::ANDV_PRED, Op, DAG);
13263  case ISD::VECREDUCE_OR:
13264  return LowerReductionToSVE(AArch64ISD::ORV_PRED, Op, DAG);
13265  case ISD::VECREDUCE_SMAX:
13266  return LowerReductionToSVE(AArch64ISD::SMAXV_PRED, Op, DAG);
13267  case ISD::VECREDUCE_SMIN:
13268  return LowerReductionToSVE(AArch64ISD::SMINV_PRED, Op, DAG);
13269  case ISD::VECREDUCE_UMAX:
13270  return LowerReductionToSVE(AArch64ISD::UMAXV_PRED, Op, DAG);
13271  case ISD::VECREDUCE_UMIN:
13272  return LowerReductionToSVE(AArch64ISD::UMINV_PRED, Op, DAG);
13273  case ISD::VECREDUCE_XOR:
13274  return LowerReductionToSVE(AArch64ISD::EORV_PRED, Op, DAG);
13275  case ISD::VECREDUCE_FADD:
13276  return LowerReductionToSVE(AArch64ISD::FADDV_PRED, Op, DAG);
13277  case ISD::VECREDUCE_FMAX:
13278  return LowerReductionToSVE(AArch64ISD::FMAXNMV_PRED, Op, DAG);
13279  case ISD::VECREDUCE_FMIN:
13280  return LowerReductionToSVE(AArch64ISD::FMINNMV_PRED, Op, DAG);
13281  default:
13282  llvm_unreachable("Unhandled fixed length reduction");
13283  }
13284  }
13285 
13286  // Lower NEON reductions.
13287  SDLoc dl(Op);
13288  switch (Op.getOpcode()) {
13289  case ISD::VECREDUCE_ADD:
13290  return getReductionSDNode(AArch64ISD::UADDV, dl, Op, DAG);
13291  case ISD::VECREDUCE_SMAX:
13292  return getReductionSDNode(AArch64ISD::SMAXV, dl, Op, DAG);
13293  case ISD::VECREDUCE_SMIN:
13294  return getReductionSDNode(AArch64ISD::SMINV, dl, Op, DAG);
13295  case ISD::VECREDUCE_UMAX:
13296  return getReductionSDNode(AArch64ISD::UMAXV, dl, Op, DAG);
13297  case ISD::VECREDUCE_UMIN:
13298  return getReductionSDNode(AArch64ISD::UMINV, dl, Op, DAG);
13299  case ISD::VECREDUCE_FMAX: {
13300  return DAG.getNode(
13301  ISD::INTRINSIC_WO_CHAIN, dl, Op.getValueType(),
13302  DAG.getConstant(Intrinsic::aarch64_neon_fmaxnmv, dl, MVT::i32),
13303  Src);
13304  }
13305  case ISD::VECREDUCE_FMIN: {
13306  return DAG.getNode(
13307  ISD::INTRINSIC_WO_CHAIN, dl, Op.getValueType(),
13308  DAG.getConstant(Intrinsic::aarch64_neon_fminnmv, dl, MVT::i32),
13309  Src);
13310  }
13311  default:
13312  llvm_unreachable("Unhandled reduction");
13313  }
13314 }
13315 
13316 SDValue AArch64TargetLowering::LowerATOMIC_LOAD_SUB(SDValue Op,
13317  SelectionDAG &DAG) const {
13318  auto &Subtarget = DAG.getSubtarget<AArch64Subtarget>();
13319  if (!Subtarget.hasLSE() && !Subtarget.outlineAtomics())
13320  return SDValue();
13321 
13322  // LSE has an atomic load-add instruction, but not a load-sub.
13323  SDLoc dl(Op);
13324  MVT VT = Op.getSimpleValueType();
13325  SDValue RHS = Op.getOperand(2);
13326  AtomicSDNode *AN = cast<AtomicSDNode>(Op.getNode());
13327  RHS = DAG.getNode(ISD::SUB, dl, VT, DAG.getConstant(0, dl, VT), RHS);
13328  return DAG.getAtomic(ISD::ATOMIC_LOAD_ADD, dl, AN->getMemoryVT(),
13329  Op.getOperand(0), Op.getOperand(1), RHS,
13330  AN->getMemOperand());
13331 }
13332 
13333 SDValue AArch64TargetLowering::LowerATOMIC_LOAD_AND(SDValue Op,
13334  SelectionDAG &DAG) const {
13335  auto &Subtarget = DAG.getSubtarget<AArch64Subtarget>();
13336  // No point replacing if we don't have the relevant instruction/libcall anyway
13337  if (!Subtarget.hasLSE() && !Subtarget.outlineAtomics())
13338  return SDValue();
13339 
13340  // LSE has an atomic load-clear instruction, but not a load-and.
13341  SDLoc dl(Op);
13342  MVT VT = Op.getSimpleValueType();
13343  assert(VT != MVT::i128 && "Handled elsewhere, code replicated.");
13344  SDValue RHS = Op.getOperand(2);
13345  AtomicSDNode *AN = cast<AtomicSDNode>(Op.getNode());
13346  RHS = DAG.getNode(ISD::XOR, dl, VT, DAG.getConstant(-1ULL, dl, VT), RHS);
13347  return DAG.getAtomic(ISD::ATOMIC_LOAD_CLR, dl, AN->getMemoryVT(),
13348  Op.getOperand(0), Op.getOperand(1), RHS,
13349  AN->getMemOperand());
13350 }
13351 
13352 SDValue AArch64TargetLowering::LowerWindowsDYNAMIC_STACKALLOC(
13353  SDValue Op, SDValue Chain, SDValue &Size, SelectionDAG &DAG) const {
13354  SDLoc dl(Op);
13355  EVT PtrVT = getPointerTy(DAG.getDataLayout());
13357  PtrVT, 0);
13358 
13359  const AArch64RegisterInfo *TRI = Subtarget->getRegisterInfo();
13360  const uint32_t *Mask = TRI->getWindowsStackProbePreservedMask();
13361  if (Subtarget->hasCustomCallingConv())
13362  TRI->UpdateCustomCallPreservedMask(DAG.getMachineFunction(), &Mask);
13363 
13364  Size = DAG.getNode(ISD::SRL, dl, MVT::i64, Size,
13365  DAG.getConstant(4, dl, MVT::i64));
13366  Chain = DAG.getCopyToReg(Chain, dl, AArch64::X15, Size, SDValue());
13367  Chain =
13369  Chain, Callee, DAG.getRegister(AArch64::X15, MVT::i64),
13370  DAG.getRegisterMask(Mask), Chain.getValue(1));
13371  // To match the actual intent better, we should read the output from X15 here
13372  // again (instead of potentially spilling it to the stack), but rereading Size
13373  // from X15 here doesn't work at -O0, since it thinks that X15 is undefined
13374  // here.
13375 
13376  Size = DAG.getNode(ISD::SHL, dl, MVT::i64, Size,
13377  DAG.getConstant(4, dl, MVT::i64));
13378  return Chain;
13379 }
13380 
13381 SDValue
13382 AArch64TargetLowering::LowerDYNAMIC_STACKALLOC(SDValue Op,
13383  SelectionDAG &DAG) const {
13384  assert(Subtarget->isTargetWindows() &&
13385  "Only Windows alloca probing supported");
13386  SDLoc dl(Op);
13387  // Get the inputs.
13388  SDNode *Node = Op.getNode();
13389  SDValue Chain = Op.getOperand(0);
13390  SDValue Size = Op.getOperand(1);
13391  MaybeAlign Align =
13392  cast<ConstantSDNode>(Op.getOperand(2))->getMaybeAlignValue();
13393  EVT VT = Node->getValueType(0);
13394 
13396  "no-stack-arg-probe")) {
13397  SDValue SP = DAG.getCopyFromReg(Chain, dl, AArch64::SP, MVT::i64);
13398  Chain = SP.getValue(1);
13399  SP = DAG.getNode(ISD::SUB, dl, MVT::i64, SP, Size);
13400  if (Align)
13401  SP = DAG.getNode(ISD::AND, dl, VT, SP.getValue(0),
13402  DAG.getConstant(-(uint64_t)Align->value(), dl, VT));
13403  Chain = DAG.getCopyToReg(Chain, dl, AArch64::SP, SP);
13404  SDValue Ops[2] = {SP, Chain};
13405  return DAG.getMergeValues(Ops, dl);
13406  }
13407 
13408  Chain = DAG.getCALLSEQ_START(Chain, 0, 0, dl);
13409 
13410  Chain = LowerWindowsDYNAMIC_STACKALLOC(Op, Chain, Size, DAG);
13411 
13412  SDValue SP = DAG.getCopyFromReg(Chain, dl, AArch64::SP, MVT::i64);
13413  Chain = SP.getValue(1);
13414  SP = DAG.getNode(ISD::SUB, dl, MVT::i64, SP, Size);
13415  if (Align)
13416  SP = DAG.getNode(ISD::AND, dl, VT, SP.getValue(0),
13417  DAG.getConstant(-(uint64_t)Align->value(), dl, VT));
13418  Chain = DAG.getCopyToReg(Chain, dl, AArch64::SP, SP);
13419 
13420  Chain = DAG.getCALLSEQ_END(Chain, 0, 0, SDValue(), dl);
13421 
13422  SDValue Ops[2] = {SP, Chain};
13423  return DAG.getMergeValues(Ops, dl);
13424 }
13425 
13426 SDValue AArch64TargetLowering::LowerVSCALE(SDValue Op,
13427  SelectionDAG &DAG) const {
13428  EVT VT = Op.getValueType();
13429  assert(VT != MVT::i64 && "Expected illegal VSCALE node");
13430 
13431  SDLoc DL(Op);
13432  APInt MulImm = cast<ConstantSDNode>(Op.getOperand(0))->getAPIntValue();
13433  return DAG.getZExtOrTrunc(DAG.getVScale(DL, MVT::i64, MulImm.sext(64)), DL,
13434  VT);
13435 }
13436 
13437 /// Set the IntrinsicInfo for the `aarch64_sve_st<N>` intrinsics.
13438 template <unsigned NumVecs>
13439 static bool
13441  AArch64TargetLowering::IntrinsicInfo &Info, const CallInst &CI) {
13442  Info.opc = ISD::INTRINSIC_VOID;
13443  // Retrieve EC from first vector argument.
13444  const EVT VT = TLI.getMemValueType(DL, CI.getArgOperand(0)->getType());
13446 #ifndef NDEBUG
13447  // Check the assumption that all input vectors are the same type.
13448  for (unsigned I = 0; I < NumVecs; ++I)
13449  assert(VT == TLI.getMemValueType(DL, CI.getArgOperand(I)->getType()) &&
13450  "Invalid type.");
13451 #endif
13452  // memVT is `NumVecs * VT`.
13453  Info.memVT = EVT::getVectorVT(CI.getType()->getContext(), VT.getScalarType(),
13454  EC * NumVecs);
13455  Info.ptrVal = CI.getArgOperand(CI.arg_size() - 1);
13456  Info.offset = 0;
13457  Info.align.reset();
13459  return true;
13460 }
13461 
13462 /// getTgtMemIntrinsic - Represent NEON load and store intrinsics as
13463 /// MemIntrinsicNodes. The associated MachineMemOperands record the alignment
13464 /// specified in the intrinsic calls.
13466  const CallInst &I,
13467  MachineFunction &MF,
13468  unsigned Intrinsic) const {
13469  auto &DL = I.getModule()->getDataLayout();
13470  switch (Intrinsic) {
13471  case Intrinsic::aarch64_sve_st2:
13472  return setInfoSVEStN<2>(*this, DL, Info, I);
13473  case Intrinsic::aarch64_sve_st3:
13474  return setInfoSVEStN<3>(*this, DL, Info, I);
13475  case Intrinsic::aarch64_sve_st4:
13476  return setInfoSVEStN<4>(*this, DL, Info, I);
13477  case Intrinsic::aarch64_neon_ld2:
13478  case Intrinsic::aarch64_neon_ld3:
13479  case Intrinsic::aarch64_neon_ld4:
13480  case Intrinsic::aarch64_neon_ld1x2:
13481  case Intrinsic::aarch64_neon_ld1x3:
13482  case Intrinsic::aarch64_neon_ld1x4:
13483  case Intrinsic::aarch64_neon_ld2lane:
13484  case Intrinsic::aarch64_neon_ld3lane:
13485  case Intrinsic::aarch64_neon_ld4lane:
13486  case Intrinsic::aarch64_neon_ld2r:
13487  case Intrinsic::aarch64_neon_ld3r:
13488  case Intrinsic::aarch64_neon_ld4r: {
13490  // Conservatively set memVT to the entire set of vectors loaded.
13491  uint64_t NumElts = DL.getTypeSizeInBits(I.getType()) / 64;
13492  Info.memVT = EVT::getVectorVT(I.getType()->getContext(), MVT::i64, NumElts);
13493  Info.ptrVal = I.getArgOperand(I.arg_size() - 1);
13494  Info.offset = 0;
13495  Info.align.reset();
13496  // volatile loads with NEON intrinsics not supported
13498  return true;
13499  }
13500  case Intrinsic::aarch64_neon_st2:
13501  case Intrinsic::aarch64_neon_st3:
13502  case Intrinsic::aarch64_neon_st4:
13503  case Intrinsic::aarch64_neon_st1x2:
13504  case Intrinsic::aarch64_neon_st1x3:
13505  case Intrinsic::aarch64_neon_st1x4:
13506  case Intrinsic::aarch64_neon_st2lane:
13507  case Intrinsic::aarch64_neon_st3lane:
13508  case Intrinsic::aarch64_neon_st4lane: {
13509  Info.opc = ISD::INTRINSIC_VOID;
13510  // Conservatively set memVT to the entire set of vectors stored.
13511  unsigned NumElts = 0;
13512  for (const Value *Arg : I.args()) {
13513  Type *ArgTy = Arg->getType();
13514  if (!ArgTy->isVectorTy())
13515  break;
13516  NumElts += DL.getTypeSizeInBits(ArgTy) / 64;
13517  }
13518  Info.memVT = EVT::getVectorVT(I.getType()->getContext(), MVT::i64, NumElts);
13519  Info.ptrVal = I.getArgOperand(I.arg_size() - 1);
13520  Info.offset = 0;
13521  Info.align.reset();
13522  // volatile stores with NEON intrinsics not supported
13524  return true;
13525  }
13526  case Intrinsic::aarch64_ldaxr:
13527  case Intrinsic::aarch64_ldxr: {
13528  Type *ValTy = I.getParamElementType(0);
13530  Info.memVT = MVT::getVT(ValTy);
13531  Info.ptrVal = I.getArgOperand(0);
13532  Info.offset = 0;
13533  Info.align = DL.getABITypeAlign(ValTy);
13535  return true;
13536  }
13537  case Intrinsic::aarch64_stlxr:
13538  case Intrinsic::aarch64_stxr: {
13539  Type *ValTy = I.getParamElementType(1);
13541  Info.memVT = MVT::getVT(ValTy);
13542  Info.ptrVal = I.getArgOperand(1);
13543  Info.offset = 0;
13544  Info.align = DL.getABITypeAlign(ValTy);
13546  return true;
13547  }
13548  case Intrinsic::aarch64_ldaxp:
13549  case Intrinsic::aarch64_ldxp:
13551  Info.memVT = MVT::i128;
13552  Info.ptrVal = I.getArgOperand(0);
13553  Info.offset = 0;
13554  Info.align = Align(16);
13556  return true;
13557  case Intrinsic::aarch64_stlxp:
13558  case Intrinsic::aarch64_stxp:
13560  Info.memVT = MVT::i128;
13561  Info.ptrVal = I.getArgOperand(2);
13562  Info.offset = 0;
13563  Info.align = Align(16);
13565  return true;
13566  case Intrinsic::aarch64_sve_ldnt1: {
13567  Type *ElTy = cast<VectorType>(I.getType())->getElementType();
13569  Info.memVT = MVT::getVT(I.getType());
13570  Info.ptrVal = I.getArgOperand(1);
13571  Info.offset = 0;
13572  Info.align = DL.getABITypeAlign(ElTy);
13574  return true;
13575  }
13576  case Intrinsic::aarch64_sve_stnt1: {
13577  Type *ElTy =
13578  cast<VectorType>(I.getArgOperand(0)->getType())->getElementType();
13580  Info.memVT = MVT::getVT(I.getOperand(0)->getType());
13581  Info.ptrVal = I.getArgOperand(2);
13582  Info.offset = 0;
13583  Info.align = DL.getABITypeAlign(ElTy);
13585  return true;
13586  }
13587  case Intrinsic::aarch64_mops_memset_tag: {
13588  Value *Dst = I.getArgOperand(0);
13589  Value *Val = I.getArgOperand(1);
13591  Info.memVT = MVT::getVT(Val->getType());
13592  Info.ptrVal = Dst;
13593  Info.offset = 0;
13594  Info.align = I.getParamAlign(0).valueOrOne();
13596  // The size of the memory being operated on is unknown at this point
13598  return true;
13599  }
13600  default:
13601  break;
13602  }
13603 
13604  return false;
13605 }
13606 
13608  ISD::LoadExtType ExtTy,
13609  EVT NewVT) const {
13610  // TODO: This may be worth removing. Check regression tests for diffs.
13611  if (!TargetLoweringBase::shouldReduceLoadWidth(Load, ExtTy, NewVT))
13612  return false;
13613 
13614  // If we're reducing the load width in order to avoid having to use an extra
13615  // instruction to do extension then it's probably a good idea.
13616  if (ExtTy != ISD::NON_EXTLOAD)
13617  return true;
13618  // Don't reduce load width if it would prevent us from combining a shift into
13619  // the offset.
13620  MemSDNode *Mem = dyn_cast<MemSDNode>(Load);
13621  assert(Mem);
13622  const SDValue &Base = Mem->getBasePtr();
13623  if (Base.getOpcode() == ISD::ADD &&
13624  Base.getOperand(1).getOpcode() == ISD::SHL &&
13625  Base.getOperand(1).hasOneUse() &&
13626  Base.getOperand(1).getOperand(1).getOpcode() == ISD::Constant) {
13627  // It's unknown whether a scalable vector has a power-of-2 bitwidth.
13628  if (Mem->getMemoryVT().isScalableVector())
13629  return false;
13630  // The shift can be combined if it matches the size of the value being
13631  // loaded (and so reducing the width would make it not match).
13632  uint64_t ShiftAmount = Base.getOperand(1).getConstantOperandVal(1);
13633  uint64_t LoadBytes = Mem->getMemoryVT().getSizeInBits()/8;
13634  if (ShiftAmount == Log2_32(LoadBytes))
13635  return false;
13636  }
13637  // We have no reason to disallow reducing the load width, so allow it.
13638  return true;
13639 }
13640 
13641 // Truncations from 64-bit GPR to 32-bit GPR is free.
13643  if (!Ty1->isIntegerTy() || !Ty2->isIntegerTy())
13644  return false;
13645  uint64_t NumBits1 = Ty1->getPrimitiveSizeInBits().getFixedValue();
13646  uint64_t NumBits2 = Ty2->getPrimitiveSizeInBits().getFixedValue();
13647  return NumBits1 > NumBits2;
13648 }
13650  if (VT1.isVector() || VT2.isVector() || !VT1.isInteger() || !VT2.isInteger())
13651  return false;
13652  uint64_t NumBits1 = VT1.getFixedSizeInBits();
13653  uint64_t NumBits2 = VT2.getFixedSizeInBits();
13654  return NumBits1 > NumBits2;
13655 }
13656 
13657 /// Check if it is profitable to hoist instruction in then/else to if.
13658 /// Not profitable if I and it's user can form a FMA instruction
13659 /// because we prefer FMSUB/FMADD.
13661  if (I->getOpcode() != Instruction::FMul)
13662  return true;
13663 
13664  if (!I->hasOneUse())
13665  return true;
13666 
13667  Instruction *User = I->user_back();
13668 
13669  if (!(User->getOpcode() == Instruction::FSub ||
13670  User->getOpcode() == Instruction::FAdd))
13671  return true;
13672 
13674  const Function *F = I->getFunction();
13675  const DataLayout &DL = F->getParent()->getDataLayout();
13676  Type *Ty = User->getOperand(0)->getType();
13677 
13678  return !(isFMAFasterThanFMulAndFAdd(*F, Ty) &&
13680  (Options.AllowFPOpFusion == FPOpFusion::Fast ||
13681  Options.UnsafeFPMath));
13682 }
13683 
13684 // All 32-bit GPR operations implicitly zero the high-half of the corresponding
13685 // 64-bit GPR.
13687  if (!Ty1->isIntegerTy() || !Ty2->isIntegerTy())
13688  return false;
13689  unsigned NumBits1 = Ty1->getPrimitiveSizeInBits();
13690  unsigned NumBits2 = Ty2->getPrimitiveSizeInBits();
13691  return NumBits1 == 32 && NumBits2 == 64;
13692 }
13694  if (VT1.isVector() || VT2.isVector() || !VT1.isInteger() || !VT2.isInteger())
13695  return false;
13696  unsigned NumBits1 = VT1.getSizeInBits();
13697  unsigned NumBits2 = VT2.getSizeInBits();
13698  return NumBits1 == 32 && NumBits2 == 64;
13699 }
13700 
13702  EVT VT1 = Val.getValueType();
13703  if (isZExtFree(VT1, VT2)) {
13704  return true;
13705  }
13706 
13707  if (Val.getOpcode() != ISD::LOAD)
13708  return false;
13709 
13710  // 8-, 16-, and 32-bit integer loads all implicitly zero-extend.
13711  return (VT1.isSimple() && !VT1.isVector() && VT1.isInteger() &&
13712  VT2.isSimple() && !VT2.isVector() && VT2.isInteger() &&
13713  VT1.getSizeInBits() <= 32);
13714 }
13715 
13716 bool AArch64TargetLowering::isExtFreeImpl(const Instruction *Ext) const {
13717  if (isa<FPExtInst>(Ext))
13718  return false;
13719 
13720  // Vector types are not free.
13721  if (Ext->getType()->isVectorTy())
13722  return false;
13723 
13724  for (const Use &U : Ext->uses()) {
13725  // The extension is free if we can fold it with a left shift in an
13726  // addressing mode or an arithmetic operation: add, sub, and cmp.
13727 
13728  // Is there a shift?
13729  const Instruction *Instr = cast<Instruction>(U.getUser());
13730 
13731  // Is this a constant shift?
13732  switch (Instr->getOpcode()) {
13733  case Instruction::Shl:
13734  if (!isa<ConstantInt>(Instr->getOperand(1)))
13735  return false;
13736  break;
13737  case Instruction::GetElementPtr: {
13738  gep_type_iterator GTI = gep_type_begin(Instr);
13739  auto &DL = Ext->getModule()->getDataLayout();
13740  std::advance(GTI, U.getOperandNo()-1);
13741  Type *IdxTy = GTI.getIndexedType();
13742  // This extension will end up with a shift because of the scaling factor.
13743  // 8-bit sized types have a scaling factor of 1, thus a shift amount of 0.
13744  // Get the shift amount based on the scaling factor:
13745  // log2(sizeof(IdxTy)) - log2(8).
13746  uint64_t ShiftAmt =
13747  llvm::countr_zero(DL.getTypeStoreSizeInBits(IdxTy).getFixedValue()) -
13748  3;
13749  // Is the constant foldable in the shift of the addressing mode?
13750  // I.e., shift amount is between 1 and 4 inclusive.
13751  if (ShiftAmt == 0 || ShiftAmt > 4)
13752  return false;
13753  break;
13754  }
13755  case Instruction::Trunc:
13756  // Check if this is a noop.
13757  // trunc(sext ty1 to ty2) to ty1.
13758  if (Instr->getType() == Ext->getOperand(0)->getType())
13759  continue;
13760  [[fallthrough]];
13761  default:
13762  return false;
13763  }
13764 
13765  // At this point we can use the bfm family, so this extension is free
13766  // for that use.
13767  }
13768  return true;
13769 }
13770 
13771 static bool isSplatShuffle(Value *V) {
13772  if (auto *Shuf = dyn_cast<ShuffleVectorInst>(V))
13773  return all_equal(Shuf->getShuffleMask());
13774  return false;
13775 }
13776 
13777 /// Check if both Op1 and Op2 are shufflevector extracts of either the lower
13778 /// or upper half of the vector elements.
13779 static bool areExtractShuffleVectors(Value *Op1, Value *Op2,
13780  bool AllowSplat = false) {
13781  auto areTypesHalfed = [](Value *FullV, Value *HalfV) {
13782  auto *FullTy = FullV->getType();
13783  auto *HalfTy = HalfV->getType();
13784  return FullTy->getPrimitiveSizeInBits().getFixedValue() ==
13785  2 * HalfTy->getPrimitiveSizeInBits().getFixedValue();
13786  };
13787 
13788  auto extractHalf = [](Value *FullV, Value *HalfV) {
13789  auto *FullVT = cast<FixedVectorType>(FullV->getType());
13790  auto *HalfVT = cast<FixedVectorType>(HalfV->getType());
13791  return FullVT->getNumElements() == 2 * HalfVT->getNumElements();
13792  };
13793 
13794  ArrayRef<int> M1, M2;
13795  Value *S1Op1 = nullptr, *S2Op1 = nullptr;
13796  if (!match(Op1, m_Shuffle(m_Value(S1Op1), m_Undef(), m_Mask(M1))) ||
13797  !match(Op2, m_Shuffle(m_Value(S2Op1), m_Undef(), m_Mask(M2))))
13798  return false;
13799 
13800  // If we allow splats, set S1Op1/S2Op1 to nullptr for the relavant arg so that
13801  // it is not checked as an extract below.
13802  if (AllowSplat && isSplatShuffle(Op1))
13803  S1Op1 = nullptr;
13804  if (AllowSplat && isSplatShuffle(Op2))
13805  S2Op1 = nullptr;
13806 
13807  // Check that the operands are half as wide as the result and we extract
13808  // half of the elements of the input vectors.
13809  if ((S1Op1 && (!areTypesHalfed(S1Op1, Op1) || !extractHalf(S1Op1, Op1))) ||
13810  (S2Op1 && (!areTypesHalfed(S2Op1, Op2) || !extractHalf(S2Op1, Op2))))
13811  return false;
13812 
13813  // Check the mask extracts either the lower or upper half of vector
13814  // elements.
13815  int M1Start = 0;
13816  int M2Start = 0;
13817  int NumElements = cast<FixedVectorType>(Op1->getType())->getNumElements() * 2;
13818  if ((S1Op1 &&
13819  !ShuffleVectorInst::isExtractSubvectorMask(M1, NumElements, M1Start)) ||
13820  (S2Op1 &&
13821  !ShuffleVectorInst::isExtractSubvectorMask(M2, NumElements, M2Start)))
13822  return false;
13823 
13824  if ((M1Start != 0 && M1Start != (NumElements / 2)) ||
13825  (M2Start != 0 && M2Start != (NumElements / 2)))
13826  return false;
13827  if (S1Op1 && S2Op1 && M1Start != M2Start)
13828  return false;
13829 
13830  return true;
13831 }
13832 
13833 /// Check if Ext1 and Ext2 are extends of the same type, doubling the bitwidth
13834 /// of the vector elements.
13835 static bool areExtractExts(Value *Ext1, Value *Ext2) {
13836  auto areExtDoubled = [](Instruction *Ext) {
13837  return Ext->getType()->getScalarSizeInBits() ==
13838  2 * Ext->getOperand(0)->getType()->getScalarSizeInBits();
13839  };
13840 
13841  if (!match(Ext1, m_ZExtOrSExt(m_Value())) ||
13842  !match(Ext2, m_ZExtOrSExt(m_Value())) ||
13843  !areExtDoubled(cast<Instruction>(Ext1)) ||
13844  !areExtDoubled(cast<Instruction>(Ext2)))
13845  return false;
13846 
13847  return true;
13848 }
13849 
13850 /// Check if Op could be used with vmull_high_p64 intrinsic.
13852  Value *VectorOperand = nullptr;
13853  ConstantInt *ElementIndex = nullptr;
13854  return match(Op, m_ExtractElt(m_Value(VectorOperand),
13855  m_ConstantInt(ElementIndex))) &&
13856  ElementIndex->getValue() == 1 &&
13857  isa<FixedVectorType>(VectorOperand->getType()) &&
13858  cast<FixedVectorType>(VectorOperand->getType())->getNumElements() == 2;
13859 }
13860 
13861 /// Check if Op1 and Op2 could be used with vmull_high_p64 intrinsic.
13862 static bool areOperandsOfVmullHighP64(Value *Op1, Value *Op2) {
13864 }
13865 
13866 /// Check if sinking \p I's operands to I's basic block is profitable, because
13867 /// the operands can be folded into a target instruction, e.g.
13868 /// shufflevectors extracts and/or sext/zext can be folded into (u,s)subl(2).
13870  Instruction *I, SmallVectorImpl<Use *> &Ops) const {
13871  if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(I)) {
13872  switch (II->getIntrinsicID()) {
13873  case Intrinsic::aarch64_neon_smull:
13874  case Intrinsic::aarch64_neon_umull:
13875  if (areExtractShuffleVectors(II->getOperand(0), II->getOperand(1),
13876  /*AllowSplat=*/true)) {
13877  Ops.push_back(&II->getOperandUse(0));
13878  Ops.push_back(&II->getOperandUse(1));
13879  return true;
13880  }
13881  [[fallthrough]];
13882 
13883  case Intrinsic::fma:
13884  if (isa<VectorType>(I->getType()) &&
13885  cast<VectorType>(I->getType())->getElementType()->isHalfTy() &&
13886  !Subtarget->hasFullFP16())
13887  return false;
13888  [[fallthrough]];
13889  case Intrinsic::aarch64_neon_sqdmull:
13890  case Intrinsic::aarch64_neon_sqdmulh:
13891  case Intrinsic::aarch64_neon_sqrdmulh:
13892  // Sink splats for index lane variants
13893  if (isSplatShuffle(II->getOperand(0)))
13894  Ops.push_back(&II->getOperandUse(0));
13895  if (isSplatShuffle(II->getOperand(1)))
13896  Ops.push_back(&II->getOperandUse(1));
13897  return !Ops.empty();
13898  case Intrinsic::aarch64_sve_ptest_first:
13899  case Intrinsic::aarch64_sve_ptest_last:
13900  if (auto *IIOp = dyn_cast<IntrinsicInst>(II->getOperand(0)))
13901  if (IIOp->getIntrinsicID() == Intrinsic::aarch64_sve_ptrue)
13902  Ops.push_back(&II->getOperandUse(0));
13903  return !Ops.empty();
13904  case Intrinsic::aarch64_sme_write_horiz:
13905  case Intrinsic::aarch64_sme_write_vert:
13906  case Intrinsic::aarch64_sme_writeq_horiz:
13907  case Intrinsic::aarch64_sme_writeq_vert: {
13908  auto *Idx = dyn_cast<Instruction>(II->getOperand(1));
13909  if (!Idx || Idx->getOpcode() != Instruction::Add)
13910  return false;
13911  Ops.push_back(&II->getOperandUse(1));
13912  return true;
13913  }
13914  case Intrinsic::aarch64_sme_read_horiz:
13915  case Intrinsic::aarch64_sme_read_vert:
13916  case Intrinsic::aarch64_sme_readq_horiz:
13917  case Intrinsic::aarch64_sme_readq_vert:
13918  case Intrinsic::aarch64_sme_ld1b_vert:
13919  case Intrinsic::aarch64_sme_ld1h_vert:
13920  case Intrinsic::aarch64_sme_ld1w_vert:
13921  case Intrinsic::aarch64_sme_ld1d_vert:
13922  case Intrinsic::aarch64_sme_ld1q_vert:
13923  case Intrinsic::aarch64_sme_st1b_vert:
13924  case Intrinsic::aarch64_sme_st1h_vert:
13925  case Intrinsic::aarch64_sme_st1w_vert:
13926  case Intrinsic::aarch64_sme_st1d_vert:
13927  case Intrinsic::aarch64_sme_st1q_vert:
13928  case Intrinsic::aarch64_sme_ld1b_horiz:
13929  case Intrinsic::aarch64_sme_ld1h_horiz:
13930  case Intrinsic::aarch64_sme_ld1w_horiz:
13931  case Intrinsic::aarch64_sme_ld1d_horiz:
13932  case Intrinsic::aarch64_sme_ld1q_horiz:
13933  case Intrinsic::aarch64_sme_st1b_horiz:
13934  case Intrinsic::aarch64_sme_st1h_horiz:
13935  case Intrinsic::aarch64_sme_st1w_horiz:
13936  case Intrinsic::aarch64_sme_st1d_horiz:
13937  case Intrinsic::aarch64_sme_st1q_horiz: {
13938  auto *Idx = dyn_cast<Instruction>(II->getOperand(3));
13939  if (!Idx || Idx->getOpcode() != Instruction::Add)
13940  return false;
13941  Ops.push_back(&II->getOperandUse(3));
13942  return true;
13943  }
13944  case Intrinsic::aarch64_neon_pmull:
13945  if (!areExtractShuffleVectors(II->getOperand(0), II->getOperand(1)))
13946  return false;
13947  Ops.push_back(&II->getOperandUse(0));
13948  Ops.push_back(&II->getOperandUse(1));
13949  return true;
13950  case Intrinsic::aarch64_neon_pmull64:
13951  if (!areOperandsOfVmullHighP64(II->getArgOperand(0),
13952  II->getArgOperand(1)))
13953  return false;
13954  Ops.push_back(&II->getArgOperandUse(0));
13955  Ops.push_back(&II->getArgOperandUse(1));
13956  return true;
13957  default:
13958  return false;
13959  }
13960  }
13961 
13962  if (!I->getType()->isVectorTy())
13963  return false;
13964 
13965  switch (I->getOpcode()) {
13966  case Instruction::Sub:
13967  case Instruction::Add: {
13968  if (!areExtractExts(I->getOperand(0), I->getOperand(1)))
13969  return false;
13970 
13971  // If the exts' operands extract either the lower or upper elements, we
13972  // can sink them too.
13973  auto Ext1 = cast<Instruction>(I->getOperand(0));
13974  auto Ext2 = cast<Instruction>(I->getOperand(1));
13975  if (areExtractShuffleVectors(Ext1->getOperand(0), Ext2->getOperand(0))) {
13976  Ops.push_back(&Ext1->getOperandUse(0));
13977  Ops.push_back(&Ext2->getOperandUse(0));
13978  }
13979 
13980  Ops.push_back(&I->getOperandUse(0));
13981  Ops.push_back(&I->getOperandUse(1));
13982 
13983  return true;
13984  }
13985  case Instruction::Mul: {
13986  int NumZExts = 0, NumSExts = 0;
13987  for (auto &Op : I->operands()) {
13988  // Make sure we are not already sinking this operand
13989  if (any_of(Ops, [&](Use *U) { return U->get() == Op; }))
13990  continue;
13991 
13992  if (match(&Op, m_SExt(m_Value()))) {
13993  NumSExts++;
13994  continue;
13995  } else if (match(&Op, m_ZExt(m_Value()))) {
13996  NumZExts++;
13997  continue;
13998  }
13999 
14000  ShuffleVectorInst *Shuffle = dyn_cast<ShuffleVectorInst>(Op);
14001 
14002  // If the Shuffle is a splat and the operand is a zext/sext, sinking the
14003  // operand and the s/zext can help create indexed s/umull. This is
14004  // especially useful to prevent i64 mul being scalarized.
14005  if (Shuffle && isSplatShuffle(Shuffle) &&
14006  match(Shuffle->getOperand(0), m_ZExtOrSExt(m_Value()))) {
14007  Ops.push_back(&Shuffle->getOperandUse(0));
14008  Ops.push_back(&Op);
14009  if (match(Shuffle->getOperand(0), m_SExt(m_Value())))
14010  NumSExts++;
14011  else
14012  NumZExts++;
14013  continue;
14014  }
14015 
14016  if (!Shuffle)
14017  continue;
14018 
14019  Value *ShuffleOperand = Shuffle->getOperand(0);
14020  InsertElementInst *Insert = dyn_cast<InsertElementInst>(ShuffleOperand);
14021  if (!Insert)
14022  continue;
14023 
14024  Instruction *OperandInstr = dyn_cast<Instruction>(Insert->getOperand(1));
14025  if (!OperandInstr)
14026  continue;
14027 
14028  ConstantInt *ElementConstant =
14029  dyn_cast<ConstantInt>(Insert->getOperand(2));
14030  // Check that the insertelement is inserting into element 0
14031  if (!ElementConstant || ElementConstant->getZExtValue() != 0)
14032  continue;
14033 
14034  unsigned Opcode = OperandInstr->getOpcode();
14035  if (Opcode == Instruction::SExt)
14036  NumSExts++;
14037  else if (Opcode == Instruction::ZExt)
14038  NumZExts++;
14039  else {
14040  // If we find that the top bits are known 0, then we can sink and allow
14041  // the backend to generate a umull.
14042  unsigned Bitwidth = I->getType()->getScalarSizeInBits();
14043  APInt UpperMask = APInt::getHighBitsSet(Bitwidth, Bitwidth / 2);
14044  const DataLayout &DL = I->getFunction()->getParent()->getDataLayout();
14045  if (!MaskedValueIsZero(OperandInstr, UpperMask, DL))
14046  continue;
14047  NumZExts++;
14048  }
14049 
14050  Ops.push_back(&Shuffle->getOperandUse(0));
14051  Ops.push_back(&Op);
14052  }
14053 
14054  // Is it profitable to sink if we found two of the same type of extends.
14055  return !Ops.empty() && (NumSExts == 2 || NumZExts == 2);
14056  }
14057  default:
14058  return false;
14059  }
14060  return false;
14061 }
14062 
14063 static void createTblShuffleForZExt(ZExtInst *ZExt, bool IsLittleEndian) {
14064  Value *Op = ZExt->getOperand(0);
14065  auto *SrcTy = cast<FixedVectorType>(Op->getType());
14066  auto *DstTy = cast<FixedVectorType>(ZExt->getType());
14067  auto SrcWidth = cast<IntegerType>(SrcTy->getElementType())->getBitWidth();
14068  auto DstWidth = cast<IntegerType>(DstTy->getElementType())->getBitWidth();
14069  assert(DstWidth % SrcWidth == 0 &&
14070  "TBL lowering is not supported for a ZExt instruction with this "
14071  "source & destination element type.");
14072  unsigned ZExtFactor = DstWidth / SrcWidth;
14073  unsigned NumElts = SrcTy->getNumElements();
14074  IRBuilder<> Builder(ZExt);
14076  // Create a mask that selects <0,...,Op[i]> for each lane of the destination
14077  // vector to replace the original ZExt. This can later be lowered to a set of
14078  // tbl instructions.
14079  for (unsigned i = 0; i < NumElts * ZExtFactor; i++) {
14080  if (IsLittleEndian) {
14081  if (i % ZExtFactor == 0)
14082  Mask.push_back(i / ZExtFactor);
14083  else
14084  Mask.push_back(NumElts);
14085  } else {
14086  if ((i + 1) % ZExtFactor == 0)
14087  Mask.push_back((i - ZExtFactor + 1) / ZExtFactor);
14088  else
14089  Mask.push_back(NumElts);
14090  }
14091  }
14092 
14093  auto *FirstEltZero = Builder.CreateInsertElement(
14094  PoisonValue::get(SrcTy), Builder.getInt8(0), uint64_t(0));
14095  Value *Result = Builder.CreateShuffleVector(Op, FirstEltZero, Mask);
14096  Result = Builder.CreateBitCast(Result, DstTy);
14097  ZExt->replaceAllUsesWith(Result);
14098  ZExt->eraseFromParent();
14099 }
14100 
14101 static void createTblForTrunc(TruncInst *TI, bool IsLittleEndian) {
14102  IRBuilder<> Builder(TI);
14103  SmallVector<Value *> Parts;
14104  int NumElements = cast<FixedVectorType>(TI->getType())->getNumElements();
14105  auto *SrcTy = cast<FixedVectorType>(TI->getOperand(0)->getType());
14106  auto *DstTy = cast<FixedVectorType>(TI->getType());
14107  assert(SrcTy->getElementType()->isIntegerTy() &&
14108  "Non-integer type source vector element is not supported");
14109  assert(DstTy->getElementType()->isIntegerTy(8) &&
14110  "Unsupported destination vector element type");
14111  unsigned SrcElemTySz =
14112  cast<IntegerType>(SrcTy->getElementType())->getBitWidth();
14113  unsigned DstElemTySz =
14114  cast<IntegerType>(DstTy->getElementType())->getBitWidth();
14115  assert((SrcElemTySz % DstElemTySz == 0) &&
14116  "Cannot lower truncate to tbl instructions for a source element size "
14117  "that is not divisible by the destination element size");
14118  unsigned TruncFactor = SrcElemTySz / DstElemTySz;
14119  assert((SrcElemTySz == 16 || SrcElemTySz == 32 || SrcElemTySz == 64) &&
14120  "Unsupported source vector element type size");
14121  Type *VecTy = FixedVectorType::get(Builder.getInt8Ty(), 16);
14122 
14123  // Create a mask to choose every nth byte from the source vector table of
14124  // bytes to create the truncated destination vector, where 'n' is the truncate
14125  // ratio. For example, for a truncate from Yxi64 to Yxi8, choose
14126  // 0,8,16,..Y*8th bytes for the little-endian format
14127  SmallVector<Constant *, 16> MaskConst;
14128  for (int Itr = 0; Itr < 16; Itr++) {
14129  if (Itr < NumElements)
14130  MaskConst.push_back(Builder.getInt8(
14131  IsLittleEndian ? Itr * TruncFactor
14132  : Itr * TruncFactor + (TruncFactor - 1)));
14133  else
14134  MaskConst.push_back(Builder.getInt8(255));
14135  }
14136 
14137  int MaxTblSz = 128 * 4;
14138  int MaxSrcSz = SrcElemTySz * NumElements;
14139  int ElemsPerTbl =
14140  (MaxTblSz > MaxSrcSz) ? NumElements : (MaxTblSz / SrcElemTySz);
14141  assert(ElemsPerTbl <= 16 &&
14142  "Maximum elements selected using TBL instruction cannot exceed 16!");
14143 
14144  int ShuffleCount = 128 / SrcElemTySz;
14145  SmallVector<int> ShuffleLanes;
14146  for (int i = 0; i < ShuffleCount; ++i)
14147  ShuffleLanes.push_back(i);
14148 
14149  // Create TBL's table of bytes in 1,2,3 or 4 FP/SIMD registers using shuffles
14150  // over the source vector. If TBL's maximum 4 FP/SIMD registers are saturated,
14151  // call TBL & save the result in a vector of TBL results for combining later.
14153  while (ShuffleLanes.back() < NumElements) {
14154  Parts.push_back(Builder.CreateBitCast(
14155  Builder.CreateShuffleVector(TI->getOperand(0), ShuffleLanes), VecTy));
14156 
14157  if (Parts.size() == 4) {
14158  auto *F = Intrinsic::getDeclaration(TI->getModule(),
14159  Intrinsic::aarch64_neon_tbl4, VecTy);
14160  Parts.push_back(ConstantVector::get(MaskConst));
14161  Results.push_back(Builder.CreateCall(F, Parts));
14162  Parts.clear();
14163  }
14164 
14165  for (int i = 0; i < ShuffleCount; ++i)
14166  ShuffleLanes[i] += ShuffleCount;
14167  }
14168 
14169  assert((Parts.empty() || Results.empty()) &&
14170  "Lowering trunc for vectors requiring different TBL instructions is "
14171  "not supported!");
14172  // Call TBL for the residual table bytes present in 1,2, or 3 FP/SIMD
14173  // registers
14174  if (!Parts.empty()) {
14175  Intrinsic::ID TblID;
14176  switch (Parts.size()) {
14177  case 1:
14178  TblID = Intrinsic::aarch64_neon_tbl1;
14179  break;
14180  case 2:
14181  TblID = Intrinsic::aarch64_neon_tbl2;
14182  break;
14183  case 3:
14184  TblID = Intrinsic::aarch64_neon_tbl3;
14185  break;
14186  }
14187 
14188  auto *F = Intrinsic::getDeclaration(TI->getModule(), TblID, VecTy);
14189  Parts.push_back(ConstantVector::get(MaskConst));
14190  Results.push_back(Builder.CreateCall(F, Parts));
14191  }
14192 
14193  // Extract the destination vector from TBL result(s) after combining them
14194  // where applicable. Currently, at most two TBLs are supported.
14195  assert(Results.size() <= 2 && "Trunc lowering does not support generation of "
14196  "more than 2 tbl instructions!");
14197  Value *FinalResult = Results[0];
14198  if (Results.size() == 1) {
14199  if (ElemsPerTbl < 16) {
14200  SmallVector<int> FinalMask(ElemsPerTbl);
14201  std::iota(FinalMask.begin(), FinalMask.end(), 0);
14202  FinalResult = Builder.CreateShuffleVector(Results[0], FinalMask);
14203  }
14204  } else {
14205  SmallVector<int> FinalMask(ElemsPerTbl * Results.size());
14206  if (ElemsPerTbl < 16) {
14207  std::iota(FinalMask.begin(), FinalMask.begin() + ElemsPerTbl, 0);
14208  std::iota(FinalMask.begin() + ElemsPerTbl, FinalMask.end(), 16);
14209  } else {
14210  std::iota(FinalMask.begin(), FinalMask.end(), 0);
14211  }
14212  FinalResult =
14213  Builder.CreateShuffleVector(Results[0], Results[1], FinalMask);
14214  }
14215 
14216  TI->replaceAllUsesWith(FinalResult);
14217  TI->eraseFromParent();
14218 }
14219 
14221  Loop *L) const {
14222  // shuffle_vector instructions are serialized when targeting SVE,
14223  // see LowerSPLAT_VECTOR. This peephole is not beneficial.
14224  if (Subtarget->useSVEForFixedLengthVectors())
14225  return false;
14226 
14227  // Try to optimize conversions using tbl. This requires materializing constant
14228  // index vectors, which can increase code size and add loads. Skip the
14229  // transform unless the conversion is in a loop block guaranteed to execute
14230  // and we are not optimizing for size.
14231  Function *F = I->getParent()->getParent();
14232  if (!L || L->getHeader() != I->getParent() || F->hasMinSize() ||
14233  F->hasOptSize())
14234  return false;
14235 
14236  auto *SrcTy = dyn_cast<FixedVectorType>(I->getOperand(0)->getType());
14237  auto *DstTy = dyn_cast<FixedVectorType>(I->getType());
14238  if (!SrcTy || !DstTy)
14239  return false;
14240 
14241  // Convert 'zext <Y x i8> %x to <Y x i8X>' to a shuffle that can be
14242  // lowered to tbl instructions to insert the original i8 elements
14243  // into i8x lanes. This is enabled for cases where it is beneficial.
14244  auto *ZExt = dyn_cast<ZExtInst>(I);
14245  if (ZExt && SrcTy->getElementType()->isIntegerTy(8)) {
14246  auto DstWidth = cast<IntegerType>(DstTy->getElementType())->getBitWidth();
14247  if (DstWidth % 8 == 0 && DstWidth > 16 && DstWidth < 64) {
14248  createTblShuffleForZExt(ZExt, Subtarget->isLittleEndian());
14249  return true;
14250  }
14251  }
14252 
14253  auto *UIToFP = dyn_cast<UIToFPInst>(I);
14254  if (UIToFP && SrcTy->getElementType()->isIntegerTy(8) &&
14255  DstTy->getElementType()->isFloatTy()) {
14257  auto *ZExt = cast<ZExtInst>(
14258  Builder.CreateZExt(I->getOperand(0), VectorType::getInteger(DstTy)));
14259  auto *UI = Builder.CreateUIToFP(ZExt, DstTy);
14260  I->replaceAllUsesWith(UI);
14261  I->eraseFromParent();
14262  createTblShuffleForZExt(ZExt, Subtarget->isLittleEndian());
14263  return true;
14264  }
14265 
14266  // Convert 'fptoui <(8|16) x float> to <(8|16) x i8>' to a wide fptoui
14267  // followed by a truncate lowered to using tbl.4.
14268  auto *FPToUI = dyn_cast<FPToUIInst>(I);
14269  if (FPToUI &&
14270  (SrcTy->getNumElements() == 8 || SrcTy->getNumElements() == 16) &&
14271  SrcTy->getElementType()->isFloatTy() &&
14272  DstTy->getElementType()->isIntegerTy(8)) {
14274  auto *WideConv = Builder.CreateFPToUI(FPToUI->getOperand(0),
14275  VectorType::getInteger(SrcTy));
14276  auto *TruncI = Builder.CreateTrunc(WideConv, DstTy);
14277  I->replaceAllUsesWith(TruncI);
14278  I->eraseFromParent();
14279  createTblForTrunc(cast<TruncInst>(TruncI), Subtarget->isLittleEndian());
14280  return true;
14281  }
14282 
14283  // Convert 'trunc <(8|16) x (i32|i64)> %x to <(8|16) x i8>' to an appropriate
14284  // tbl instruction selecting the lowest/highest (little/big endian) 8 bits
14285  // per lane of the input that is represented using 1,2,3 or 4 128-bit table
14286  // registers
14287  auto *TI = dyn_cast<TruncInst>(I);
14288  if (TI && DstTy->getElementType()->isIntegerTy(8) &&
14289  ((SrcTy->getElementType()->isIntegerTy(32) ||
14290  SrcTy->getElementType()->isIntegerTy(64)) &&
14291  (SrcTy->getNumElements() == 16 || SrcTy->getNumElements() == 8))) {
14292  createTblForTrunc(TI, Subtarget->isLittleEndian());
14293  return true;
14294  }
14295 
14296  return false;
14297 }
14298 
14300  Align &RequiredAligment) const {
14301  if (!LoadedType.isSimple() ||
14302  (!LoadedType.isInteger() && !LoadedType.isFloatingPoint()))
14303  return false;
14304  // Cyclone supports unaligned accesses.
14305  RequiredAligment = Align(1);
14306  unsigned NumBits = LoadedType.getSizeInBits();
14307  return NumBits == 32 || NumBits == 64;
14308 }
14309 
14310 /// A helper function for determining the number of interleaved accesses we
14311 /// will generate when lowering accesses of the given type.
14313  VectorType *VecTy, const DataLayout &DL, bool UseScalable) const {
14314  unsigned VecSize = 128;
14315  if (UseScalable)
14316  VecSize = std::max(Subtarget->getMinSVEVectorSizeInBits(), 128u);
14317  return std::max<unsigned>(1, (DL.getTypeSizeInBits(VecTy) + 127) / VecSize);
14318 }
14319 
14322  if (Subtarget->getProcFamily() == AArch64Subtarget::Falkor &&
14323  I.getMetadata(FALKOR_STRIDED_ACCESS_MD) != nullptr)
14324  return MOStridedAccess;
14326 }
14327 
14329  VectorType *VecTy, const DataLayout &DL, bool &UseScalable) const {
14330 
14331  unsigned VecSize = DL.getTypeSizeInBits(VecTy);
14332  unsigned ElSize = DL.getTypeSizeInBits(VecTy->getElementType());
14333  unsigned NumElements = cast<FixedVectorType>(VecTy)->getNumElements();
14334 
14335  UseScalable = false;
14336 
14337  // Ensure that the predicate for this number of elements is available.
14338  if (Subtarget->hasSVE() && !getSVEPredPatternFromNumElements(NumElements))
14339  return false;
14340 
14341  // Ensure the number of vector elements is greater than 1.
14342  if (NumElements < 2)
14343  return false;
14344 
14345  // Ensure the element type is legal.
14346  if (ElSize != 8 && ElSize != 16 && ElSize != 32 && ElSize != 64)
14347  return false;
14348 
14349  if (Subtarget->forceStreamingCompatibleSVE() ||
14350  (Subtarget->useSVEForFixedLengthVectors() &&
14351  (VecSize % Subtarget->getMinSVEVectorSizeInBits() == 0 ||
14352  (VecSize < Subtarget->getMinSVEVectorSizeInBits() &&
14353  isPowerOf2_32(NumElements) && VecSize > 128)))) {
14354  UseScalable = true;
14355  return true;
14356  }
14357 
14358  // Ensure the total vector size is 64 or a multiple of 128. Types larger than
14359  // 128 will be split into multiple interleaved accesses.
14360  return VecSize == 64 || VecSize % 128 == 0;
14361 }
14362 
14364  if (VTy->getElementType() == Type::getDoubleTy(VTy->getContext()))
14365  return ScalableVectorType::get(VTy->getElementType(), 2);
14366 
14367  if (VTy->getElementType() == Type::getFloatTy(VTy->getContext()))
14368  return ScalableVectorType::get(VTy->getElementType(), 4);
14369 
14370  if (VTy->getElementType() == Type::getBFloatTy(VTy->getContext()))
14371  return ScalableVectorType::get(VTy->getElementType(), 8);
14372 
14373  if (VTy->getElementType() == Type::getHalfTy(VTy->getContext()))
14374  return ScalableVectorType::get(VTy->getElementType(), 8);
14375 
14376  if (VTy->getElementType() == Type::getInt64Ty(VTy->getContext()))
14377  return ScalableVectorType::get(VTy->getElementType(), 2);
14378 
14379  if (VTy->getElementType() == Type::getInt32Ty(VTy->getContext()))
14380  return ScalableVectorType::get(VTy->getElementType(), 4);
14381 
14382  if (VTy->getElementType() == Type::getInt16Ty(VTy->getContext()))
14383  return ScalableVectorType::get(VTy->getElementType(), 8);
14384 
14385  if (VTy->getElementType() == Type::getInt8Ty(VTy->getContext()))
14386  return ScalableVectorType::get(VTy->getElementType(), 16);
14387 
14388  llvm_unreachable("Cannot handle input vector type");
14389 }
14390 
14391 /// Lower an interleaved load into a ldN intrinsic.
14392 ///
14393 /// E.g. Lower an interleaved load (Factor = 2):
14394 /// %wide.vec = load <8 x i32>, <8 x i32>* %ptr
14395 /// %v0 = shuffle %wide.vec, undef, <0, 2, 4, 6> ; Extract even elements
14396 /// %v1 = shuffle %wide.vec, undef, <1, 3, 5, 7> ; Extract odd elements
14397 ///
14398 /// Into:
14399 /// %ld2 = { <4 x i32>, <4 x i32> } call llvm.aarch64.neon.ld2(%ptr)
14400 /// %vec0 = extractelement { <4 x i32>, <4 x i32> } %ld2, i32 0
14401 /// %vec1 = extractelement { <4 x i32>, <4 x i32> } %ld2, i32 1
14404  ArrayRef<unsigned> Indices, unsigned Factor) const {
14405  assert(Factor >= 2 && Factor <= getMaxSupportedInterleaveFactor() &&
14406  "Invalid interleave factor");
14407  assert(!Shuffles.empty() && "Empty shufflevector input");
14408  assert(Shuffles.size() == Indices.size() &&
14409  "Unmatched number of shufflevectors and indices");
14410 
14411  const DataLayout &DL = LI->getModule()->getDataLayout();
14412 
14413  VectorType *VTy = Shuffles[0]->getType();
14414 
14415  // Skip if we do not have NEON and skip illegal vector types. We can
14416  // "legalize" wide vector types into multiple interleaved accesses as long as
14417  // the vector types are divisible by 128.
14418  bool UseScalable;
14419  if (!Subtarget->hasNEON() ||
14420  !isLegalInterleavedAccessType(VTy, DL, UseScalable))
14421  return false;
14422 
14423  unsigned NumLoads = getNumInterleavedAccesses(VTy, DL, UseScalable);
14424 
14425  auto *FVTy = cast<FixedVectorType>(VTy);
14426 
14427  // A pointer vector can not be the return type of the ldN intrinsics. Need to
14428  // load integer vectors first and then convert to pointer vectors.
14429  Type *EltTy = FVTy->getElementType();
14430  if (EltTy->isPointerTy())
14431  FVTy =
14432  FixedVectorType::get(DL.getIntPtrType(EltTy), FVTy->getNumElements());
14433 
14434  // If we're going to generate more than one load, reset the sub-vector type
14435  // to something legal.
14436  FVTy = FixedVectorType::get(FVTy->getElementType(),
14437  FVTy->getNumElements() / NumLoads);
14438 
14439  auto *LDVTy =
14440  UseScalable ? cast<VectorType>(getSVEContainerIRType(FVTy)) : FVTy;
14441 
14442  IRBuilder<> Builder(LI);
14443 
14444  // The base address of the load.
14445  Value *BaseAddr = LI->getPointerOperand();
14446 
14447  if (NumLoads > 1) {
14448  // We will compute the pointer operand of each load from the original base
14449  // address using GEPs. Cast the base address to a pointer to the scalar
14450  // element type.
14451  BaseAddr = Builder.CreateBitCast(
14452  BaseAddr,
14453  LDVTy->getElementType()->getPointerTo(LI->getPointerAddressSpace()));
14454  }
14455 
14456  Type *PtrTy =
14457  UseScalable
14458  ? LDVTy->getElementType()->getPointerTo(LI->getPointerAddressSpace())
14459  : LDVTy->getPointerTo(LI->getPointerAddressSpace());
14460  Type *PredTy = VectorType::get(Type::getInt1Ty(LDVTy->getContext()),
14461  LDVTy->getElementCount());
14462 
14463  static const Intrinsic::ID SVELoadIntrs[3] = {
14464  Intrinsic::aarch64_sve_ld2_sret, Intrinsic::aarch64_sve_ld3_sret,
14465  Intrinsic::aarch64_sve_ld4_sret};
14466  static const Intrinsic::ID NEONLoadIntrs[3] = {Intrinsic::aarch64_neon_ld2,
14467  Intrinsic::aarch64_neon_ld3,
14468  Intrinsic::aarch64_neon_ld4};
14469  Function *LdNFunc;
14470  if (UseScalable)
14471  LdNFunc = Intrinsic::getDeclaration(LI->getModule(),
14472  SVELoadIntrs[Factor - 2], {LDVTy});
14473  else
14474  LdNFunc = Intrinsic::getDeclaration(
14475  LI->getModule(), NEONLoadIntrs[Factor - 2], {LDVTy, PtrTy});
14476 
14477  // Holds sub-vectors extracted from the load intrinsic return values. The
14478  // sub-vectors are associated with the shufflevector instructions they will
14479  // replace.
14481 
14482  Value *PTrue = nullptr;
14483  if (UseScalable) {
14484  std::optional<unsigned> PgPattern =
14485  getSVEPredPatternFromNumElements(FVTy->getNumElements());
14486  if (Subtarget->getMinSVEVectorSizeInBits() ==
14487  Subtarget->getMaxSVEVectorSizeInBits() &&
14488  Subtarget->getMinSVEVectorSizeInBits() == DL.getTypeSizeInBits(FVTy))
14489  PgPattern = AArch64SVEPredPattern::all;
14490 
14491  auto *PTruePat =
14492  ConstantInt::get(Type::getInt32Ty(LDVTy->getContext()), *PgPattern);
14493  PTrue = Builder.CreateIntrinsic(Intrinsic::aarch64_sve_ptrue, {PredTy},
14494  {PTruePat});
14495  }
14496 
14497  for (unsigned LoadCount = 0; LoadCount < NumLoads; ++LoadCount) {
14498 
14499  // If we're generating more than one load, compute the base address of
14500  // subsequent loads as an offset from the previous.
14501  if (LoadCount > 0)
14502  BaseAddr = Builder.CreateConstGEP1_32(LDVTy->getElementType(), BaseAddr,
14503  FVTy->getNumElements() * Factor);
14504 
14505  CallInst *LdN;
14506  if (UseScalable)
14507  LdN = Builder.CreateCall(
14508  LdNFunc, {PTrue, Builder.CreateBitCast(BaseAddr, PtrTy)}, "ldN");
14509  else
14510  LdN = Builder.CreateCall(LdNFunc, Builder.CreateBitCast(BaseAddr, PtrTy),
14511  "ldN");
14512 
14513  // Extract and store the sub-vectors returned by the load intrinsic.
14514  for (unsigned i = 0; i < Shuffles.size(); i++) {
14515  ShuffleVectorInst *SVI = Shuffles[i];
14516  unsigned Index = Indices[i];
14517 
14518  Value *SubVec = Builder.CreateExtractValue(LdN, Index);
14519 
14520  if (UseScalable)
14521  SubVec = Builder.CreateExtractVector(
14522  FVTy, SubVec,
14524 
14525  // Convert the integer vector to pointer vector if the element is pointer.
14526  if (EltTy->isPointerTy())
14527  SubVec = Builder.CreateIntToPtr(
14528  SubVec, FixedVectorType::get(SVI->getType()->getElementType(),
14529  FVTy->getNumElements()));
14530 
14531  SubVecs[SVI].push_back(SubVec);
14532  }
14533  }
14534 
14535  // Replace uses of the shufflevector instructions with the sub-vectors
14536  // returned by the load intrinsic. If a shufflevector instruction is
14537  // associated with more than one sub-vector, those sub-vectors will be
14538  // concatenated into a single wide vector.
14539  for (ShuffleVectorInst *SVI : Shuffles) {
14540  auto &SubVec = SubVecs[SVI];
14541  auto *WideVec =
14542  SubVec.size() > 1 ? concatenateVectors(Builder, SubVec) : SubVec[0];
14543  SVI->replaceAllUsesWith(WideVec);
14544  }
14545 
14546  return true;
14547 }
14548 
14549 /// Lower an interleaved store into a stN intrinsic.
14550 ///
14551 /// E.g. Lower an interleaved store (Factor = 3):
14552 /// %i.vec = shuffle <8 x i32> %v0, <8 x i32> %v1,
14553 /// <0, 4, 8, 1, 5, 9, 2, 6, 10, 3, 7, 11>
14554 /// store <12 x i32> %i.vec, <12 x i32>* %ptr
14555 ///
14556 /// Into:
14557 /// %sub.v0 = shuffle <8 x i32> %v0, <8 x i32> v1, <0, 1, 2, 3>
14558 /// %sub.v1 = shuffle <8 x i32> %v0, <8 x i32> v1, <4, 5, 6, 7>
14559 /// %sub.v2 = shuffle <8 x i32> %v0, <8 x i32> v1, <8, 9, 10, 11>
14560 /// call void llvm.aarch64.neon.st3(%sub.v0, %sub.v1, %sub.v2, %ptr)
14561 ///
14562 /// Note that the new shufflevectors will be removed and we'll only generate one
14563 /// st3 instruction in CodeGen.
14564 ///
14565 /// Example for a more general valid mask (Factor 3). Lower:
14566 /// %i.vec = shuffle <32 x i32> %v0, <32 x i32> %v1,
14567 /// <4, 32, 16, 5, 33, 17, 6, 34, 18, 7, 35, 19>
14568 /// store <12 x i32> %i.vec, <12 x i32>* %ptr
14569 ///
14570 /// Into:
14571 /// %sub.v0 = shuffle <32 x i32> %v0, <32 x i32> v1, <4, 5, 6, 7>
14572 /// %sub.v1 = shuffle <32 x i32> %v0, <32 x i32> v1, <32, 33, 34, 35>
14573 /// %sub.v2 = shuffle <32 x i32> %v0, <32 x i32> v1, <16, 17, 18, 19>
14574 /// call void llvm.aarch64.neon.st3(%sub.v0, %sub.v1, %sub.v2, %ptr)
14576  ShuffleVectorInst *SVI,
14577  unsigned Factor) const {
14578  // Skip if streaming compatible SVE is enabled, because it generates invalid
14579  // code in streaming mode when SVE length is not specified.
14580  if (Subtarget->forceStreamingCompatibleSVE())
14581  return false;
14582 
14583  assert(Factor >= 2 && Factor <= getMaxSupportedInterleaveFactor() &&
14584  "Invalid interleave factor");
14585 
14586  auto *VecTy = cast<FixedVectorType>(SVI->getType());
14587  assert(VecTy->getNumElements() % Factor == 0 && "Invalid interleaved store");
14588 
14589  unsigned LaneLen = VecTy->getNumElements() / Factor;
14590  Type *EltTy = VecTy->getElementType();
14591  auto *SubVecTy = FixedVectorType::get(EltTy, LaneLen);
14592 
14593  const DataLayout &DL = SI->getModule()->getDataLayout();
14594  bool UseScalable;
14595 
14596  // Skip if we do not have NEON and skip illegal vector types. We can
14597  // "legalize" wide vector types into multiple interleaved accesses as long as
14598  // the vector types are divisible by 128.
14599  if (!Subtarget->hasNEON() ||
14600  !isLegalInterleavedAccessType(SubVecTy, DL, UseScalable))
14601  return false;
14602 
14603  unsigned NumStores = getNumInterleavedAccesses(SubVecTy, DL, UseScalable);
14604 
14605  Value *Op0 = SVI->getOperand(0);
14606  Value *Op1 = SVI->getOperand(1);
14608 
14609  // StN intrinsics don't support pointer vectors as arguments. Convert pointer
14610  // vectors to integer vectors.
14611  if (EltTy->isPointerTy()) {
14612  Type *IntTy = DL.getIntPtrType(EltTy);
14613  unsigned NumOpElts =
14614  cast<FixedVectorType>(Op0->getType())->getNumElements();
14615 
14616  // Convert to the corresponding integer vector.
14617  auto *IntVecTy = FixedVectorType::get(IntTy, NumOpElts);
14618  Op0 = Builder.CreatePtrToInt(Op0, IntVecTy);
14619  Op1 = Builder.CreatePtrToInt(Op1, IntVecTy);
14620 
14621  SubVecTy = FixedVectorType::get(IntTy, LaneLen);
14622  }
14623 
14624  // If we're going to generate more than one store, reset the lane length
14625  // and sub-vector type to something legal.
14626  LaneLen /= NumStores;
14627  SubVecTy = FixedVectorType::get(SubVecTy->getElementType(), LaneLen);
14628 
14629  auto *STVTy = UseScalable ? cast<VectorType>(getSVEContainerIRType(SubVecTy))
14630  : SubVecTy;
14631 
14632  // The base address of the store.
14633  Value *BaseAddr = SI->getPointerOperand();
14634 
14635  if (NumStores > 1) {
14636  // We will compute the pointer operand of each store from the original base
14637  // address using GEPs. Cast the base address to a pointer to the scalar
14638  // element type.
14639  BaseAddr = Builder.CreateBitCast(
14640  BaseAddr,
14641  SubVecTy->getElementType()->getPointerTo(SI->getPointerAddressSpace()));
14642  }
14643 
14644  auto Mask = SVI->getShuffleMask();
14645 
14646  // Sanity check if all the indices are NOT in range.
14647  // If mask is `undef` or `poison`, `Mask` may be a vector of -1s.
14648  // If all of them are `undef`, OOB read will happen later.
14649  if (llvm::all_of(Mask, [](int Idx) { return Idx == UndefMaskElem; })) {
14650  return false;
14651  }
14652  // A 64bit st2 which does not start at element 0 will involved adding extra
14653  // ext elements, making the st2 unprofitable.
14654  if (Factor == 2 && SubVecTy->getPrimitiveSizeInBits() == 64 && Mask[0] != 0)
14655  return false;
14656 
14657  Type *PtrTy =
14658  UseScalable
14659  ? STVTy->getElementType()->getPointerTo(SI->getPointerAddressSpace())
14660  : STVTy->getPointerTo(SI->getPointerAddressSpace());
14661  Type *PredTy = VectorType::get(Type::getInt1Ty(STVTy->getContext()),
14662  STVTy->getElementCount());
14663 
14664  static const Intrinsic::ID SVEStoreIntrs[3] = {Intrinsic::aarch64_sve_st2,
14665  Intrinsic::aarch64_sve_st3,
14666  Intrinsic::aarch64_sve_st4};
14667  static const Intrinsic::ID NEONStoreIntrs[3] = {Intrinsic::aarch64_neon_st2,
14668  Intrinsic::aarch64_neon_st3,
14669  Intrinsic::aarch64_neon_st4};
14670  Function *StNFunc;
14671  if (UseScalable)
14672  StNFunc = Intrinsic::getDeclaration(SI->getModule(),
14673  SVEStoreIntrs[Factor - 2], {STVTy});
14674  else
14675  StNFunc = Intrinsic::getDeclaration(
14676  SI->getModule(), NEONStoreIntrs[Factor - 2], {STVTy, PtrTy});
14677 
14678  Value *PTrue = nullptr;
14679  if (UseScalable) {
14680  std::optional<unsigned> PgPattern =
14681  getSVEPredPatternFromNumElements(SubVecTy->getNumElements());
14682  if (Subtarget->getMinSVEVectorSizeInBits() ==
14683  Subtarget->getMaxSVEVectorSizeInBits() &&
14684  Subtarget->getMinSVEVectorSizeInBits() ==
14685  DL.getTypeSizeInBits(SubVecTy))
14686  PgPattern = AArch64SVEPredPattern::all;
14687 
14688  auto *PTruePat =
14689  ConstantInt::get(Type::getInt32Ty(STVTy->getContext()), *PgPattern);
14690  PTrue = Builder.CreateIntrinsic(Intrinsic::aarch64_sve_ptrue, {PredTy},
14691  {PTruePat});
14692  }
14693 
14694  for (unsigned StoreCount = 0; StoreCount < NumStores; ++StoreCount) {
14695 
14697 
14698  // Split the shufflevector operands into sub vectors for the new stN call.
14699  for (unsigned i = 0; i < Factor; i++) {
14700  Value *Shuffle;
14701  unsigned IdxI = StoreCount * LaneLen * Factor + i;
14702  if (Mask[IdxI] >= 0) {
14703  Shuffle = Builder.CreateShuffleVector(
14704  Op0, Op1, createSequentialMask(Mask[IdxI], LaneLen, 0));
14705  } else {
14706  unsigned StartMask = 0;
14707  for (unsigned j = 1; j < LaneLen; j++) {
14708  unsigned IdxJ = StoreCount * LaneLen * Factor + j * Factor + i;
14709  if (Mask[IdxJ] >= 0) {
14710  StartMask = Mask[IdxJ] - j;
14711  break;
14712  }
14713  }
14714  // Note: Filling undef gaps with random elements is ok, since
14715  // those elements were being written anyway (with undefs).
14716  // In the case of all undefs we're defaulting to using elems from 0
14717  // Note: StartMask cannot be negative, it's checked in
14718  // isReInterleaveMask
14719  Shuffle = Builder.CreateShuffleVector(
14720  Op0, Op1, createSequentialMask(StartMask, LaneLen, 0));
14721  }
14722 
14723  if (UseScalable)
14724  Shuffle = Builder.CreateInsertVector(
14725  STVTy, UndefValue::get(STVTy), Shuffle,
14726  ConstantInt::get(Type::getInt64Ty(STVTy->getContext()), 0));
14727 
14728  Ops.push_back(Shuffle);
14729  }
14730 
14731  if (UseScalable)
14732  Ops.push_back(PTrue);
14733 
14734  // If we generating more than one store, we compute the base address of
14735  // subsequent stores as an offset from the previous.
14736  if (StoreCount > 0)
14737  BaseAddr = Builder.CreateConstGEP1_32(SubVecTy->getElementType(),
14738  BaseAddr, LaneLen * Factor);
14739 
14740  Ops.push_back(Builder.CreateBitCast(BaseAddr, PtrTy));
14741  Builder.CreateCall(StNFunc, Ops);
14742  }
14743  return true;
14744 }
14745 
14747  const MemOp &Op, const AttributeList &FuncAttributes) const {
14748  bool CanImplicitFloat = !FuncAttributes.hasFnAttr(Attribute::NoImplicitFloat);
14749  bool CanUseNEON = Subtarget->hasNEON() && CanImplicitFloat;
14750  bool CanUseFP = Subtarget->hasFPARMv8() && CanImplicitFloat;
14751  // Only use AdvSIMD to implement memset of 32-byte and above. It would have
14752  // taken one instruction to materialize the v2i64 zero and one store (with
14753  // restrictive addressing mode). Just do i64 stores.
14754  bool IsSmallMemset = Op.isMemset() && Op.size() < 32;
14755  auto AlignmentIsAcceptable = [&](EVT VT, Align AlignCheck) {
14756  if (Op.isAligned(AlignCheck))
14757  return true;
14758  unsigned Fast;
14759  return allowsMisalignedMemoryAccesses(VT, 0, Align(1),
14760  MachineMemOperand::MONone, &Fast) &&
14761  Fast;
14762  };
14763 
14764  if (CanUseNEON && Op.isMemset() && !IsSmallMemset &&
14765  AlignmentIsAcceptable(MVT::v16i8, Align(16)))
14766  return MVT::v16i8;
14767  if (CanUseFP && !IsSmallMemset && AlignmentIsAcceptable(MVT::f128, Align(16)))
14768  return MVT::f128;
14769  if (Op.size() >= 8 && AlignmentIsAcceptable(MVT::i64, Align(8)))
14770  return MVT::i64;
14771  if (Op.size() >= 4 && AlignmentIsAcceptable(MVT::i32, Align(4)))
14772  return MVT::i32;
14773  return MVT::Other;
14774 }
14775 
14777  const MemOp &Op, const AttributeList &FuncAttributes) const {
14778  bool CanImplicitFloat = !FuncAttributes.hasFnAttr(Attribute::NoImplicitFloat);
14779  bool CanUseNEON = Subtarget->hasNEON() && CanImplicitFloat;
14780  bool CanUseFP = Subtarget->hasFPARMv8() && CanImplicitFloat;
14781  // Only use AdvSIMD to implement memset of 32-byte and above. It would have
14782  // taken one instruction to materialize the v2i64 zero and one store (with
14783  // restrictive addressing mode). Just do i64 stores.
14784  bool IsSmallMemset = Op.isMemset() && Op.size() < 32;
14785  auto AlignmentIsAcceptable = [&](EVT VT, Align AlignCheck) {
14786  if (Op.isAligned(AlignCheck))
14787  return true;
14788  unsigned Fast;
14789  return allowsMisalignedMemoryAccesses(VT, 0, Align(1),
14790  MachineMemOperand::MONone, &Fast) &&
14791  Fast;
14792  };
14793 
14794  if (CanUseNEON && Op.isMemset() && !IsSmallMemset &&
14795  AlignmentIsAcceptable(MVT::v2i64, Align(16)))
14796  return LLT::fixed_vector(2, 64);
14797  if (CanUseFP && !IsSmallMemset && AlignmentIsAcceptable(MVT::f128, Align(16)))
14798  return LLT::scalar(128);
14799  if (Op.size() >= 8 && AlignmentIsAcceptable(MVT::i64, Align(8)))
14800  return LLT::scalar(64);
14801  if (Op.size() >= 4 && AlignmentIsAcceptable(MVT::i32, Align(4)))
14802  return LLT::scalar(32);
14803  return LLT();
14804 }
14805 
14806 // 12-bit optionally shifted immediates are legal for adds.
14808  if (Immed == std::numeric_limits<int64_t>::min()) {
14809  LLVM_DEBUG(dbgs() << "Illegal add imm " << Immed
14810  << ": avoid UB for INT64_MIN\n");
14811  return false;
14812  }
14813  // Same encoding for add/sub, just flip the sign.
14814  Immed = std::abs(Immed);
14815  bool IsLegal = ((Immed >> 12) == 0 ||
14816  ((Immed & 0xfff) == 0 && Immed >> 24 == 0));
14817  LLVM_DEBUG(dbgs() << "Is " << Immed
14818  << " legal add imm: " << (IsLegal ? "yes" : "no") << "\n");
14819  return IsLegal;
14820 }
14821 
14822 // Return false to prevent folding
14823 // (mul (add x, c1), c2) -> (add (mul x, c2), c2*c1) in DAGCombine,
14824 // if the folding leads to worse code.
14826  SDValue AddNode, SDValue ConstNode) const {
14827  // Let the DAGCombiner decide for vector types and large types.
14828  const EVT VT = AddNode.getValueType();
14829  if (VT.isVector() || VT.getScalarSizeInBits() > 64)
14830  return true;
14831 
14832  // It is worse if c1 is legal add immediate, while c1*c2 is not
14833  // and has to be composed by at least two instructions.
14834  const ConstantSDNode *C1Node = cast<ConstantSDNode>(AddNode.getOperand(1));
14835  const ConstantSDNode *C2Node = cast<ConstantSDNode>(ConstNode);
14836  const int64_t C1 = C1Node->getSExtValue();
14837  const APInt C1C2 = C1Node->getAPIntValue() * C2Node->getAPIntValue();
14839  return true;
14841  // Adapt to the width of a register.
14842  unsigned BitSize = VT.getSizeInBits() <= 32 ? 32 : 64;
14843  AArch64_IMM::expandMOVImm(C1C2.getZExtValue(), BitSize, Insn);
14844  if (Insn.size() > 1)
14845  return false;
14846 
14847  // Default to true and let the DAGCombiner decide.
14848  return true;
14849 }
14850 
14851 // Integer comparisons are implemented with ADDS/SUBS, so the range of valid
14852 // immediates is the same as for an add or a sub.
14854  return isLegalAddImmediate(Immed);
14855 }
14856 
14857 /// isLegalAddressingMode - Return true if the addressing mode represented
14858 /// by AM is legal for this target, for a load/store of the specified type.
14860  const AddrMode &AM, Type *Ty,
14861  unsigned AS, Instruction *I) const {
14862  // AArch64 has five basic addressing modes:
14863  // reg
14864  // reg + 9-bit signed offset
14865  // reg + SIZE_IN_BYTES * 12-bit unsigned offset
14866  // reg1 + reg2
14867  // reg + SIZE_IN_BYTES * reg
14868 
14869  // No global is ever allowed as a base.
14870  if (AM.BaseGV)
14871  return false;
14872 
14873  // No reg+reg+imm addressing.
14874  if (AM.HasBaseReg && AM.BaseOffs && AM.Scale)
14875  return false;
14876 
14877  // FIXME: Update this method to support scalable addressing modes.
14878  if (isa<ScalableVectorType>(Ty)) {
14879  uint64_t VecElemNumBytes =
14880  DL.getTypeSizeInBits(cast<VectorType>(Ty)->getElementType()) / 8;
14881  return AM.HasBaseReg && !AM.BaseOffs &&
14882  (AM.Scale == 0 || (uint64_t)AM.Scale == VecElemNumBytes);
14883  }
14884 
14885  // check reg + imm case:
14886  // i.e., reg + 0, reg + imm9, reg + SIZE_IN_BYTES * uimm12
14887  uint64_t NumBytes = 0;
14888  if (Ty->isSized()) {
14889  uint64_t NumBits = DL.getTypeSizeInBits(Ty);
14890  NumBytes = NumBits / 8;
14891  if (!isPowerOf2_64(NumBits))
14892  NumBytes = 0;
14893  }
14894 
14895  if (!AM.Scale) {
14896  int64_t Offset = AM.BaseOffs;
14897 
14898  // 9-bit signed offset
14899  if (isInt<9>(Offset))
14900  return true;
14901 
14902  // 12-bit unsigned offset
14903  unsigned shift = Log2_64(NumBytes);
14904  if (NumBytes && Offset > 0 && (Offset / NumBytes) <= (1LL << 12) - 1 &&
14905  // Must be a multiple of NumBytes (NumBytes is a power of 2)
14906  (Offset >> shift) << shift == Offset)
14907  return true;
14908  return false;
14909  }
14910 
14911  // Check reg1 + SIZE_IN_BYTES * reg2 and reg1 + reg2
14912 
14913  return AM.Scale == 1 || (AM.Scale > 0 && (uint64_t)AM.Scale == NumBytes);
14914 }
14915 
14917  // Consider splitting large offset of struct or array.
14918  return true;
14919 }
14920 
14922  const MachineFunction &MF, EVT VT) const {
14923  VT = VT.getScalarType();
14924 
14925  if (!VT.isSimple())
14926  return false;
14927 
14928  switch (VT.getSimpleVT().SimpleTy) {
14929  case MVT::f16:
14930  return Subtarget->hasFullFP16();
14931  case MVT::f32:
14932  case MVT::f64:
14933  return true;
14934  default:
14935  break;
14936  }
14937 
14938  return false;
14939 }
14940 
14942  Type *Ty) const {
14943  switch (Ty->getScalarType()->getTypeID()) {
14944  case Type::FloatTyID:
14945  case Type::DoubleTyID:
14946  return true;
14947  default:
14948  return false;
14949  }
14950 }
14951 
14953  EVT VT, CodeGenOpt::Level OptLevel) const {
14954  return (OptLevel >= CodeGenOpt::Aggressive) && !VT.isScalableVector() &&
14956 }
14957 
14958 const MCPhysReg *
14960  // LR is a callee-save register, but we must treat it as clobbered by any call
14961  // site. Hence we include LR in the scratch registers, which are in turn added
14962  // as implicit-defs for stackmaps and patchpoints.
14963  static const MCPhysReg ScratchRegs[] = {
14964  AArch64::X16, AArch64::X17, AArch64::LR, 0
14965  };
14966  return ScratchRegs;
14967 }
14968 
14970  static const MCPhysReg RCRegs[] = {AArch64::FPCR};
14971  return RCRegs;
14972 }
14973 
14974 bool
14976  CombineLevel Level) const {
14977  assert((N->getOpcode() == ISD::SHL || N->getOpcode() == ISD::SRA ||
14978  N->getOpcode() == ISD::SRL) &&
14979  "Expected shift op");
14980 
14981  SDValue ShiftLHS = N->getOperand(0);
14982  EVT VT = N->getValueType(0);
14983 
14984  // If ShiftLHS is unsigned bit extraction: ((x >> C) & mask), then do not
14985  // combine it with shift 'N' to let it be lowered to UBFX except:
14986  // ((x >> C) & mask) << C.
14987  if (ShiftLHS.getOpcode() == ISD::AND && (VT == MVT::i32 || VT == MVT::i64) &&
14988  isa<ConstantSDNode>(ShiftLHS.getOperand(1))) {
14989  uint64_t TruncMask = ShiftLHS.getConstantOperandVal(1);
14990  if (isMask_64(TruncMask)) {
14991  SDValue AndLHS = ShiftLHS.getOperand(0);
14992  if (AndLHS.getOpcode() == ISD::SRL) {
14993  if (auto *SRLC = dyn_cast<ConstantSDNode>(AndLHS.getOperand(1))) {
14994  if (N->getOpcode() == ISD::SHL)
14995  if (auto *SHLC = dyn_cast<ConstantSDNode>(N->getOperand(1)))
14996  return SRLC->getZExtValue() == SHLC->getZExtValue();
14997  return false;
14998  }
14999  }
15000  }
15001  }
15002  return true;
15003 }
15004 
15006  const SDNode *N) const {
15007  assert(N->getOpcode() == ISD::XOR &&
15008  (N->getOperand(0).getOpcode() == ISD::SHL ||
15009  N->getOperand(0).getOpcode() == ISD::SRL) &&
15010  "Expected XOR(SHIFT) pattern");
15011 
15012  // Only commute if the entire NOT mask is a hidden shifted mask.
15013  auto *XorC = dyn_cast<ConstantSDNode>(N->getOperand(1));
15014  auto *ShiftC = dyn_cast<ConstantSDNode>(N->getOperand(0).getOperand(1));
15015  if (XorC && ShiftC) {
15016  unsigned MaskIdx, MaskLen;
15017  if (XorC->getAPIntValue().isShiftedMask(MaskIdx, MaskLen)) {
15018  unsigned ShiftAmt = ShiftC->getZExtValue();
15019  unsigned BitWidth = N->getValueType(0).getScalarSizeInBits();
15020  if (N->getOperand(0).getOpcode() == ISD::SHL)
15021  return MaskIdx == ShiftAmt && MaskLen == (BitWidth - ShiftAmt);
15022  return MaskIdx == 0 && MaskLen == (BitWidth - ShiftAmt);
15023  }
15024  }
15025 
15026  return false;
15027 }
15028 
15030  const SDNode *N, CombineLevel Level) const {
15031  assert(((N->getOpcode() == ISD::SHL &&
15032  N->getOperand(0).getOpcode() == ISD::SRL) ||
15033  (N->getOpcode() == ISD::SRL &&
15034  N->getOperand(0).getOpcode() == ISD::SHL)) &&
15035  "Expected shift-shift mask");
15036  // Don't allow multiuse shift folding with the same shift amount.
15037  if (!N->getOperand(0)->hasOneUse())
15038  return false;
15039 
15040  // Only fold srl(shl(x,c1),c2) iff C1 >= C2 to prevent loss of UBFX patterns.
15041  EVT VT = N->getValueType(0);
15042  if (N->getOpcode() == ISD::SRL && (VT == MVT::i32 || VT == MVT::i64)) {
15043  auto *C1 = dyn_cast<ConstantSDNode>(N->getOperand(0).getOperand(1));
15044  auto *C2 = dyn_cast<ConstantSDNode>(N->getOperand(1));
15045  return (!C1 || !C2 || C1->getZExtValue() >= C2->getZExtValue());
15046  }
15047 
15048  return true;
15049 }
15050 
15052  Type *Ty) const {
15053  assert(Ty->isIntegerTy());
15054 
15055  unsigned BitSize = Ty->getPrimitiveSizeInBits();
15056  if (BitSize == 0)
15057  return false;
15058 
15059  int64_t Val = Imm.getSExtValue();
15060  if (Val == 0 || AArch64_AM::isLogicalImmediate(Val, BitSize))
15061  return true;
15062 
15063  if ((int64_t)Val < 0)
15064  Val = ~Val;
15065  if (BitSize == 32)
15066  Val &= (1LL << 32) - 1;
15067 
15068  unsigned Shift = llvm::Log2_64((uint64_t)Val) / 16;
15069  // MOVZ is free so return true for one or fewer MOVK.
15070  return Shift < 3;
15071 }
15072 
15074  unsigned Index) const {
15076  return false;
15077 
15078  return (Index == 0 || Index == ResVT.getVectorMinNumElements());
15079 }
15080 
15081 /// Turn vector tests of the signbit in the form of:
15082 /// xor (sra X, elt_size(X)-1), -1
15083 /// into:
15084 /// cmge X, X, #0
15086  const AArch64Subtarget *Subtarget) {
15087  EVT VT = N->getValueType(0);
15088  if (!Subtarget->hasNEON() || !VT.isVector())
15089  return SDValue();
15090 
15091  // There must be a shift right algebraic before the xor, and the xor must be a
15092  // 'not' operation.
15093  SDValue Shift = N->getOperand(0);
15094  SDValue Ones = N->getOperand(1);
15095  if (Shift.getOpcode() != AArch64ISD::VASHR || !Shift.hasOneUse() ||
15097  return SDValue();
15098 
15099  // The shift should be smearing the sign bit across each vector element.
15100  auto *ShiftAmt = dyn_cast<ConstantSDNode>(Shift.getOperand(1));
15101  EVT ShiftEltTy = Shift.getValueType().getVectorElementType();
15102  if (!ShiftAmt || ShiftAmt->getZExtValue() != ShiftEltTy.getSizeInBits() - 1)
15103  return SDValue();
15104 
15105  return DAG.getNode(AArch64ISD::CMGEz, SDLoc(N), VT, Shift.getOperand(0));
15106 }
15107 
15108 // Given a vecreduce_add node, detect the below pattern and convert it to the
15109 // node sequence with UABDL, [S|U]ADB and UADDLP.
15110 //
15111 // i32 vecreduce_add(
15112 // v16i32 abs(
15113 // v16i32 sub(
15114 // v16i32 [sign|zero]_extend(v16i8 a), v16i32 [sign|zero]_extend(v16i8 b))))
15115 // =================>
15116 // i32 vecreduce_add(
15117 // v4i32 UADDLP(
15118 // v8i16 add(
15119 // v8i16 zext(
15120 // v8i8 [S|U]ABD low8:v16i8 a, low8:v16i8 b
15121 // v8i16 zext(
15122 // v8i8 [S|U]ABD high8:v16i8 a, high8:v16i8 b
15124  SelectionDAG &DAG) {
15125  // Assumed i32 vecreduce_add
15126  if (N->getValueType(0) != MVT::i32)
15127  return SDValue();
15128 
15129  SDValue VecReduceOp0 = N->getOperand(0);
15130  unsigned Opcode = VecReduceOp0.getOpcode();
15131  // Assumed v16i32 abs
15132  if (Opcode != ISD::ABS || VecReduceOp0->getValueType(0) != MVT::v16i32)
15133  return SDValue();
15134 
15135  SDValue ABS = VecReduceOp0;
15136  // Assumed v16i32 sub
15137  if (ABS->getOperand(0)->getOpcode() != ISD::SUB ||
15138  ABS->getOperand(0)->getValueType(0) != MVT::v16i32)
15139  return SDValue();
15140 
15141  SDValue SUB = ABS->getOperand(0);
15142  unsigned Opcode0 = SUB->getOperand(0).getOpcode();
15143  unsigned Opcode1 = SUB->getOperand(1).getOpcode();
15144  // Assumed v16i32 type
15145  if (SUB->getOperand(0)->getValueType(0) != MVT::v16i32 ||
15146  SUB->getOperand(1)->getValueType(0) != MVT::v16i32)
15147  return SDValue();
15148 
15149  // Assumed zext or sext
15150  bool IsZExt = false;
15151  if (Opcode0 == ISD::ZERO_EXTEND && Opcode1 == ISD::ZERO_EXTEND) {
15152  IsZExt = true;
15153  } else if (Opcode0 == ISD::SIGN_EXTEND && Opcode1 == ISD::SIGN_EXTEND) {
15154  IsZExt = false;
15155  } else
15156  return SDValue();
15157 
15158  SDValue EXT0 = SUB->getOperand(0);
15159  SDValue EXT1 = SUB->getOperand(1);
15160  // Assumed zext's operand has v16i8 type
15161  if (EXT0->getOperand(0)->getValueType(0) != MVT::v16i8 ||
15162  EXT1->getOperand(0)->getValueType(0) != MVT::v16i8)
15163  return SDValue();
15164 
15165  // Pattern is dectected. Let's convert it to sequence of nodes.
15166  SDLoc DL(N);
15167 
15168  // First, create the node pattern of UABD/SABD.
15169  SDValue UABDHigh8Op0 =
15171  DAG.getConstant(8, DL, MVT::i64));
15172  SDValue UABDHigh8Op1 =
15174  DAG.getConstant(8, DL, MVT::i64));
15175  SDValue UABDHigh8 = DAG.getNode(IsZExt ? ISD::ABDU : ISD::ABDS, DL, MVT::v8i8,
15176  UABDHigh8Op0, UABDHigh8Op1);
15177  SDValue UABDL = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::v8i16, UABDHigh8);
15178 
15179  // Second, create the node pattern of UABAL.
15180  SDValue UABDLo8Op0 =
15182  DAG.getConstant(0, DL, MVT::i64));
15183  SDValue UABDLo8Op1 =
15185  DAG.getConstant(0, DL, MVT::i64));
15186  SDValue UABDLo8 = DAG.getNode(IsZExt ? ISD::ABDU : ISD::ABDS, DL, MVT::v8i8,
15187  UABDLo8Op0, UABDLo8Op1);
15188  SDValue ZExtUABD = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::v8i16, UABDLo8);
15189  SDValue UABAL = DAG.getNode(ISD::ADD, DL, MVT::v8i16, UABDL, ZExtUABD);
15190 
15191  // Third, create the node of UADDLP.
15193 
15194  // Fourth, create the node of VECREDUCE_ADD.
15195  return DAG.getNode(ISD::VECREDUCE_ADD, DL, MVT::i32, UADDLP);
15196 }
15197 
15198 // Turn a v8i8/v16i8 extended vecreduce into a udot/sdot and vecreduce
15199 // vecreduce.add(ext(A)) to vecreduce.add(DOT(zero, A, one))
15200 // vecreduce.add(mul(ext(A), ext(B))) to vecreduce.add(DOT(zero, A, B))
15201 // If we have vectors larger than v16i8 we extract v16i8 vectors,
15202 // Follow the same steps above to get DOT instructions concatenate them
15203 // and generate vecreduce.add(concat_vector(DOT, DOT2, ..)).
15205  const AArch64Subtarget *ST) {
15206  if (!ST->hasDotProd())
15208 
15209  SDValue Op0 = N->getOperand(0);
15210  if (N->getValueType(0) != MVT::i32 ||
15212  return SDValue();
15213 
15214  unsigned ExtOpcode = Op0.getOpcode();
15215  SDValue A = Op0;
15216  SDValue B;
15217  if (ExtOpcode == ISD::MUL) {
15218  A = Op0.getOperand(0);
15219  B = Op0.getOperand(1);
15220  if (A.getOpcode() != B.getOpcode() ||
15221  A.getOperand(0).getValueType() != B.getOperand(0).getValueType())
15222  return SDValue();
15223  ExtOpcode = A.getOpcode();
15224  }
15225  if (ExtOpcode != ISD::ZERO_EXTEND && ExtOpcode != ISD::SIGN_EXTEND)
15226  return SDValue();
15227 
15228  EVT Op0VT = A.getOperand(0).getValueType();
15229  bool IsValidElementCount = Op0VT.getVectorNumElements() % 8 == 0;
15230  bool IsValidSize = Op0VT.getScalarSizeInBits() == 8;
15231  if (!IsValidElementCount || !IsValidSize)
15232  return SDValue();
15233 
15234  SDLoc DL(Op0);
15235  // For non-mla reductions B can be set to 1. For MLA we take the operand of
15236  // the extend B.
15237  if (!B)
15238  B = DAG.getConstant(1, DL, Op0VT);
15239  else
15240  B = B.getOperand(0);
15241 
15242  unsigned IsMultipleOf16 = Op0VT.getVectorNumElements() % 16 == 0;
15243  unsigned NumOfVecReduce;
15244  EVT TargetType;
15245  if (IsMultipleOf16) {
15246  NumOfVecReduce = Op0VT.getVectorNumElements() / 16;
15247  TargetType = MVT::v4i32;
15248  } else {
15249  NumOfVecReduce = Op0VT.getVectorNumElements() / 8;
15250  TargetType = MVT::v2i32;
15251  }
15252  auto DotOpcode =
15254  // Handle the case where we need to generate only one Dot operation.
15255  if (NumOfVecReduce == 1) {
15256  SDValue Zeros = DAG.getConstant(0, DL, TargetType);
15257  SDValue Dot = DAG.getNode(DotOpcode, DL, Zeros.getValueType(), Zeros,
15258  A.getOperand(0), B);
15259  return DAG.getNode(ISD::VECREDUCE_ADD, DL, N->getValueType(0), Dot);
15260  }
15261  // Generate Dot instructions that are multiple of 16.
15262  unsigned VecReduce16Num = Op0VT.getVectorNumElements() / 16;
15263  SmallVector<SDValue, 4> SDotVec16;
15264  unsigned I = 0;
15265  for (; I < VecReduce16Num; I += 1) {
15266  SDValue Zeros = DAG.getConstant(0, DL, MVT::v4i32);
15267  SDValue Op0 =
15268  DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, MVT::v16i8, A.getOperand(0),
15269  DAG.getConstant(I * 16, DL, MVT::i64));
15271  DAG.getConstant(I * 16, DL, MVT::i64));
15272  SDValue Dot =
15273  DAG.getNode(DotOpcode, DL, Zeros.getValueType(), Zeros, Op0, Op1);
15274  SDotVec16.push_back(Dot);
15275  }
15276  // Concatenate dot operations.
15277  EVT SDot16EVT =
15278  EVT::getVectorVT(*DAG.getContext(), MVT::i32, 4 * VecReduce16Num);
15279  SDValue ConcatSDot16 =
15280  DAG.getNode(ISD::CONCAT_VECTORS, DL, SDot16EVT, SDotVec16);
15281  SDValue VecReduceAdd16 =
15282  DAG.getNode(ISD::VECREDUCE_ADD, DL, N->getValueType(0), ConcatSDot16);
15283  unsigned VecReduce8Num = (Op0VT.getVectorNumElements() % 16) / 8;
15284  if (VecReduce8Num == 0)
15285  return VecReduceAdd16;
15286 
15287  // Generate the remainder Dot operation that is multiple of 8.
15288  SmallVector<SDValue, 4> SDotVec8;
15289  SDValue Zeros = DAG.getConstant(0, DL, MVT::v2i32);
15290  SDValue Vec8Op0 =
15291  DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, MVT::v8i8, A.getOperand(0),
15292  DAG.getConstant(I * 16, DL, MVT::i64));
15294  DAG.getConstant(I * 16, DL, MVT::i64));
15295  SDValue Dot =
15296  DAG.getNode(DotOpcode, DL, Zeros.getValueType(), Zeros, Vec8Op0, Vec8Op1);
15297  SDValue VecReudceAdd8 =
15298  DAG.getNode(ISD::VECREDUCE_ADD, DL, N->getValueType(0), Dot);
15299  return DAG.getNode(ISD::ADD, DL, N->getValueType(0), VecReduceAdd16,
15300  VecReudceAdd8);
15301 }
15302 
15303 // Given an (integer) vecreduce, we know the order of the inputs does not
15304 // matter. We can convert UADDV(add(zext(extract_lo(x)), zext(extract_hi(x))))
15305 // into UADDV(UADDLP(x)). This can also happen through an extra add, where we
15306 // transform UADDV(add(y, add(zext(extract_lo(x)), zext(extract_hi(x))))).
15308  auto DetectAddExtract = [&](SDValue A) {
15309  // Look for add(zext(extract_lo(x)), zext(extract_hi(x))), returning
15310  // UADDLP(x) if found.
15311  if (A.getOpcode() != ISD::ADD)
15312  return SDValue();
15313  EVT VT = A.getValueType();
15314  SDValue Op0 = A.getOperand(0);
15315  SDValue Op1 = A.getOperand(1);
15316  if (Op0.getOpcode() != Op0.getOpcode() ||
15317  (Op0.getOpcode() != ISD::ZERO_EXTEND &&
15318  Op0.getOpcode() != ISD::SIGN_EXTEND))
15319  return SDValue();
15320  SDValue Ext0 = Op0.getOperand(0);
15321  SDValue Ext1 = Op1.getOperand(0);
15322  if (Ext0.getOpcode() != ISD::EXTRACT_SUBVECTOR ||
15323  Ext1.getOpcode() != ISD::EXTRACT_SUBVECTOR ||
15324  Ext0.getOperand(0) != Ext1.getOperand(0))
15325  return SDValue();
15326  // Check that the type is twice the add types, and the extract are from
15327  // upper/lower parts of the same source.
15328  if (Ext0.getOperand(0).getValueType().getVectorNumElements() !=
15329  VT.getVectorNumElements() * 2)
15330  return SDValue();
15331  if ((Ext0.getConstantOperandVal(1) != 0 &&
15332  Ext1.getConstantOperandVal(1) != VT.getVectorNumElements()) &&
15333  (Ext1.getConstantOperandVal(1) != 0 &&
15335  return SDValue();
15336  unsigned Opcode = Op0.getOpcode() == ISD::ZERO_EXTEND ? AArch64ISD::UADDLP
15338  return DAG.getNode(Opcode, SDLoc(A), VT, Ext0.getOperand(0));
15339  };
15340 
15341  if (SDValue R = DetectAddExtract(A))
15342  return R;
15343 
15344  if (A.getOperand(0).getOpcode() == ISD::ADD && A.getOperand(0).hasOneUse())
15345  if (SDValue R = performUADDVAddCombine(A.getOperand(0), DAG))
15346  return DAG.getNode(ISD::ADD, SDLoc(A), A.getValueType(), R,
15347  A.getOperand(1));
15348  if (A.getOperand(1).getOpcode() == ISD::ADD && A.getOperand(1).hasOneUse())
15349  if (SDValue R = performUADDVAddCombine(A.getOperand(1), DAG))
15350  return DAG.getNode(ISD::ADD, SDLoc(A), A.getValueType(), R,
15351  A.getOperand(0));
15352  return SDValue();
15353 }
15354 
15356  SDValue A = N->getOperand(0);
15357  if (A.getOpcode() == ISD::ADD)
15358  if (SDValue R = performUADDVAddCombine(A, DAG))
15359  return DAG.getNode(N->getOpcode(), SDLoc(N), N->getValueType(0), R);
15360  return SDValue();
15361 }
15362 
15365  const AArch64Subtarget *Subtarget) {
15366  if (DCI.isBeforeLegalizeOps())
15367  return SDValue();
15368 
15369  return foldVectorXorShiftIntoCmp(N, DAG, Subtarget);
15370 }
15371 
15372 SDValue
15373 AArch64TargetLowering::BuildSDIVPow2(SDNode *N, const APInt &Divisor,
15374  SelectionDAG &DAG,
15375  SmallVectorImpl<SDNode *> &Created) const {
15377  if (isIntDivCheap(N->getValueType(0), Attr))
15378  return SDValue(N,0); // Lower SDIV as SDIV
15379 
15380  EVT VT = N->getValueType(0);
15381 
15382  // For scalable and fixed types, mark them as cheap so we can handle it much
15383  // later. This allows us to handle larger than legal types.
15384  if (VT.isScalableVector() || Subtarget->useSVEForFixedLengthVectors())
15385  return SDValue(N, 0);
15386 
15387  // fold (sdiv X, pow2)
15388  if ((VT != MVT::i32 && VT != MVT::i64) ||
15389  !(Divisor.isPowerOf2() || Divisor.isNegatedPowerOf2()))
15390  return SDValue();
15391 
15392  SDLoc DL(N);
15393  SDValue N0 = N->getOperand(0);
15394  unsigned Lg2 = Divisor.countTrailingZeros();
15395  SDValue Zero = DAG.getConstant(0, DL, VT);
15396  SDValue Pow2MinusOne = DAG.getConstant((1ULL << Lg2) - 1, DL, VT);
15397 
15398  // Add (N0 < 0) ? Pow2 - 1 : 0;
15399  SDValue CCVal;
15400  SDValue Cmp = getAArch64Cmp(N0, Zero, ISD::SETLT, CCVal, DAG, DL);
15401  SDValue Add = DAG.getNode(ISD::ADD, DL, VT, N0, Pow2MinusOne);
15402  SDValue CSel = DAG.getNode(AArch64ISD::CSEL, DL, VT, Add, N0, CCVal, Cmp);
15403 
15404  Created.push_back(Cmp.getNode());
15405  Created.push_back(Add.getNode());
15406  Created.push_back(CSel.getNode());
15407 
15408  // Divide by pow2.
15409  SDValue SRA =
15410  DAG.getNode(ISD::SRA, DL, VT, CSel, DAG.getConstant(Lg2, DL, MVT::i64));
15411 
15412  // If we're dividing by a positive value, we're done. Otherwise, we must
15413  // negate the result.
15414  if (Divisor.isNonNegative())
15415  return SRA;
15416 
15417  Created.push_back(SRA.getNode());
15418  return DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT), SRA);
15419 }
15420 
15421 SDValue
15422 AArch64TargetLowering::BuildSREMPow2(SDNode *N, const APInt &Divisor,
15423  SelectionDAG &DAG,
15424  SmallVectorImpl<SDNode *> &Created) const {
15426  if (isIntDivCheap(N->getValueType(0), Attr))
15427  return SDValue(N, 0); // Lower SREM as SREM
15428 
15429  EVT VT = N->getValueType(0);
15430 
15431  // For scalable and fixed types, mark them as cheap so we can handle it much
15432  // later. This allows us to handle larger than legal types.
15433  if (VT.isScalableVector() || Subtarget->useSVEForFixedLengthVectors())
15434  return SDValue(N, 0);
15435 
15436  // fold (srem X, pow2)
15437  if ((VT != MVT::i32 && VT != MVT::i64) ||
15438  !(Divisor.isPowerOf2() || Divisor.isNegatedPowerOf2()))
15439  return SDValue();
15440 
15441  unsigned Lg2 = Divisor.countTrailingZeros();
15442  if (Lg2 == 0)
15443  return SDValue();
15444 
15445  SDLoc DL(N);
15446  SDValue N0 = N->getOperand(0);
15447  SDValue Pow2MinusOne = DAG.getConstant((1ULL << Lg2) - 1, DL, VT);
15448  SDValue Zero = DAG.getConstant(0, DL, VT);
15449  SDValue CCVal, CSNeg;
15450  if (Lg2 == 1) {
15451  SDValue Cmp = getAArch64Cmp(N0, Zero, ISD::SETGE, CCVal, DAG, DL);
15452  SDValue And = DAG.getNode(ISD::AND, DL, VT, N0, Pow2MinusOne);
15453  CSNeg = DAG.getNode(AArch64ISD::CSNEG, DL, VT, And, And, CCVal, Cmp);
15454 
15455  Created.push_back(Cmp.getNode());
15456  Created.push_back(And.getNode());
15457  } else {
15458  SDValue CCVal = DAG.getConstant(AArch64CC::MI, DL, MVT_CC);
15459  SDVTList VTs = DAG.getVTList(VT, MVT::i32);
15460 
15461  SDValue Negs = DAG.getNode(AArch64ISD::SUBS, DL, VTs, Zero, N0);
15462  SDValue AndPos = DAG.getNode(ISD::AND, DL, VT, N0, Pow2MinusOne);
15463  SDValue AndNeg = DAG.getNode(ISD::AND, DL, VT, Negs, Pow2MinusOne);
15464  CSNeg = DAG.getNode(AArch64ISD::CSNEG, DL, VT, AndPos, AndNeg, CCVal,
15465  Negs.getValue(1));
15466 
15467  Created.push_back(Negs.getNode());
15468  Created.push_back(AndPos.getNode());
15469  Created.push_back(AndNeg.getNode());
15470  }
15471 
15472  return CSNeg;
15473 }
15474 
15475 static std::optional<unsigned> IsSVECntIntrinsic(SDValue S) {
15476  switch(getIntrinsicID(S.getNode())) {
15477  default:
15478  break;
15479  case Intrinsic::aarch64_sve_cntb:
15480  return 8;
15481  case Intrinsic::aarch64_sve_cnth:
15482  return 16;
15483  case Intrinsic::aarch64_sve_cntw:
15484  return 32;
15485  case Intrinsic::aarch64_sve_cntd:
15486  return 64;
15487  }
15488  return {};
15489 }
15490 
15491 /// Calculates what the pre-extend type is, based on the extension
15492 /// operation node provided by \p Extend.
15493 ///
15494 /// In the case that \p Extend is a SIGN_EXTEND or a ZERO_EXTEND, the
15495 /// pre-extend type is pulled directly from the operand, while other extend
15496 /// operations need a bit more inspection to get this information.
15497 ///
15498 /// \param Extend The SDNode from the DAG that represents the extend operation
15499 ///
15500 /// \returns The type representing the \p Extend source type, or \p MVT::Other
15501 /// if no valid type can be determined
15503  switch (Extend.getOpcode()) {
15504  case ISD::SIGN_EXTEND:
15505  case ISD::ZERO_EXTEND:
15506  return Extend.getOperand(0).getValueType();
15507  case ISD::AssertSext:
15508  case ISD::AssertZext:
15509  case ISD::SIGN_EXTEND_INREG: {
15510  VTSDNode *TypeNode = dyn_cast<VTSDNode>(Extend.getOperand(1));
15511  if (!TypeNode)
15512  return MVT::Other;
15513  return TypeNode->getVT();
15514  }
15515  case ISD::AND: {
15517  dyn_cast<ConstantSDNode>(Extend.getOperand(1).getNode());
15518  if (!Constant)
15519  return MVT::Other;
15520 
15521  uint32_t Mask = Constant->getZExtValue();
15522 
15523  if (Mask == UCHAR_MAX)
15524  return MVT::i8;
15525  else if (Mask == USHRT_MAX)
15526  return MVT::i16;
15527  else if (Mask == UINT_MAX)
15528  return MVT::i32;
15529 
15530  return MVT::Other;
15531  }
15532  default:
15533  return MVT::Other;
15534  }
15535 }
15536 
15537 /// Combines a buildvector(sext/zext) or shuffle(sext/zext, undef) node pattern
15538 /// into sext/zext(buildvector) or sext/zext(shuffle) making use of the vector
15539 /// SExt/ZExt rather than the scalar SExt/ZExt
15541  EVT VT = BV.getValueType();
15542  if (BV.getOpcode() != ISD::BUILD_VECTOR &&
15544  return SDValue();
15545 
15546  // Use the first item in the buildvector/shuffle to get the size of the
15547  // extend, and make sure it looks valid.
15548  SDValue Extend = BV->getOperand(0);
15549  unsigned ExtendOpcode = Extend.getOpcode();
15550  bool IsSExt = ExtendOpcode == ISD::SIGN_EXTEND ||
15551  ExtendOpcode == ISD::SIGN_EXTEND_INREG ||
15552  ExtendOpcode == ISD::AssertSext;
15553  if (!IsSExt && ExtendOpcode != ISD::ZERO_EXTEND &&
15554  ExtendOpcode != ISD::AssertZext && ExtendOpcode != ISD::AND)
15555  return SDValue();
15556  // Shuffle inputs are vector, limit to SIGN_EXTEND and ZERO_EXTEND to ensure
15557  // calculatePreExtendType will work without issue.
15558  if (BV.getOpcode() == ISD::VECTOR_SHUFFLE &&
15559  ExtendOpcode != ISD::SIGN_EXTEND && ExtendOpcode != ISD::ZERO_EXTEND)
15560  return SDValue();
15561 
15562  // Restrict valid pre-extend data type
15563  EVT PreExtendType = calculatePreExtendType(Extend);
15564  if (PreExtendType == MVT::Other ||
15565  PreExtendType.getScalarSizeInBits() != VT.getScalarSizeInBits() / 2)
15566  return SDValue();
15567 
15568  // Make sure all other operands are equally extended
15569  for (SDValue Op : drop_begin(BV->ops())) {
15570  if (Op.isUndef())
15571  continue;
15572  unsigned Opc = Op.getOpcode();
15573  bool OpcIsSExt = Opc == ISD::SIGN_EXTEND || Opc == ISD::SIGN_EXTEND_INREG ||
15574  Opc == ISD::AssertSext;
15575  if (OpcIsSExt != IsSExt || calculatePreExtendType(Op) != PreExtendType)
15576  return SDValue();
15577  }
15578 
15579  SDValue NBV;
15580  SDLoc DL(BV);
15581  if (BV.getOpcode() == ISD::BUILD_VECTOR) {
15582  EVT PreExtendVT = VT.changeVectorElementType(PreExtendType);
15583  EVT PreExtendLegalType =
15584  PreExtendType.getScalarSizeInBits() < 32 ? MVT::i32 : PreExtendType;
15585  SmallVector<SDValue, 8> NewOps;
15586  for (SDValue Op : BV->ops())
15587  NewOps.push_back(Op.isUndef() ? DAG.getUNDEF(PreExtendLegalType)
15588  : DAG.getAnyExtOrTrunc(Op.getOperand(0), DL,
15589  PreExtendLegalType));
15590  NBV = DAG.getNode(ISD::BUILD_VECTOR, DL, PreExtendVT, NewOps);
15591  } else { // BV.getOpcode() == ISD::VECTOR_SHUFFLE
15592  EVT PreExtendVT = VT.changeVectorElementType(PreExtendType.getScalarType());
15593  NBV = DAG.getVectorShuffle(PreExtendVT, DL, BV.getOperand(0).getOperand(0),
15594  BV.getOperand(1).isUndef()
15595  ? DAG.getUNDEF(PreExtendVT)
15596  : BV.getOperand(1).getOperand(0),
15597  cast<ShuffleVectorSDNode>(BV)->getMask());
15598  }
15599  return DAG.getNode(IsSExt ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND, DL, VT, NBV);
15600 }
15601 
15602 /// Combines a mul(dup(sext/zext)) node pattern into mul(sext/zext(dup))
15603 /// making use of the vector SExt/ZExt rather than the scalar SExt/ZExt
15605  // If the value type isn't a vector, none of the operands are going to be dups
15606  EVT VT = Mul->getValueType(0);
15607  if (VT != MVT::v8i16 && VT != MVT::v4i32 && VT != MVT::v2i64)
15608  return SDValue();
15609 
15612 
15613  // Neither operands have been changed, don't make any further changes
15614  if (!Op0 && !Op1)
15615  return SDValue();
15616 
15617  SDLoc DL(Mul);
15618  return DAG.getNode(Mul->getOpcode(), DL, VT, Op0 ? Op0 : Mul->getOperand(0),
15619  Op1 ? Op1 : Mul->getOperand(1));
15620 }
15621 
15622 // Combine v4i32 Mul(And(Srl(X, 15), 0x10001), 0xffff) -> v8i16 CMLTz
15623 // Same for other types with equivalent constants.
15625  EVT VT = N->getValueType(0);
15626  if (VT != MVT::v2i64 && VT != MVT::v1i64 && VT != MVT::v2i32 &&
15627  VT != MVT::v4i32 && VT != MVT::v4i16 && VT != MVT::v8i16)
15628  return SDValue();
15629  if (N->getOperand(0).getOpcode() != ISD::AND ||
15630  N->getOperand(0).getOperand(0).getOpcode() != ISD::SRL)
15631  return SDValue();
15632 
15633  SDValue And = N->getOperand(0);
15634  SDValue Srl = And.getOperand(0);
15635 
15636  APInt V1, V2, V3;
15637  if (!ISD::isConstantSplatVector(N->getOperand(1).getNode(), V1) ||
15638  !ISD::isConstantSplatVector(And.getOperand(1).getNode(), V2) ||
15640  return SDValue();
15641 
15642  unsigned HalfSize = VT.getScalarSizeInBits() / 2;
15643  if (!V1.isMask(HalfSize) || V2 != (1ULL | 1ULL << HalfSize) ||
15644  V3 != (HalfSize - 1))
15645  return SDValue();
15646 
15647  EVT HalfVT = EVT::getVectorVT(*DAG.getContext(),
15648  EVT::getIntegerVT(*DAG.getContext(), HalfSize),
15649  VT.getVectorElementCount() * 2);
15650 
15651  SDLoc DL(N);
15652  SDValue In = DAG.getNode(AArch64ISD::NVCAST, DL, HalfVT, Srl.getOperand(0));
15653  SDValue CM = DAG.getNode(AArch64ISD::CMLTz, DL, HalfVT, In);
15654  return DAG.getNode(AArch64ISD::NVCAST, DL, VT, CM);
15655 }
15656 
15659  const AArch64Subtarget *Subtarget) {
15660 
15662  return Ext;
15664  return Ext;
15665 
15666  if (DCI.isBeforeLegalizeOps())
15667  return SDValue();
15668 
15669  // Canonicalize X*(Y+1) -> X*Y+X and (X+1)*Y -> X*Y+Y,
15670  // and in MachineCombiner pass, add+mul will be combined into madd.
15671  // Similarly, X*(1-Y) -> X - X*Y and (1-Y)*X -> X - Y*X.
15672  SDLoc DL(N);
15673  EVT VT = N->getValueType(0);
15674  SDValue N0 = N->getOperand(0);
15675  SDValue N1 = N->getOperand(1);
15676  SDValue MulOper;
15677  unsigned AddSubOpc;
15678 
15679  auto IsAddSubWith1 = [&](SDValue V) -> bool {
15680  AddSubOpc = V->getOpcode();
15681  if ((AddSubOpc == ISD::ADD || AddSubOpc == ISD::SUB) && V->hasOneUse()) {
15682  SDValue Opnd = V->getOperand(1);
15683  MulOper = V->getOperand(0);
15684  if (AddSubOpc == ISD::SUB)
15685  std::swap(Opnd, MulOper);
15686  if (auto C = dyn_cast<ConstantSDNode>(Opnd))
15687  return C->isOne();
15688  }
15689  return false;
15690  };
15691 
15692  if (IsAddSubWith1(N0)) {
15693  SDValue MulVal = DAG.getNode(ISD::MUL, DL, VT, N1, MulOper);
15694  return DAG.getNode(AddSubOpc, DL, VT, N1, MulVal);
15695  }
15696 
15697  if (IsAddSubWith1(N1)) {
15698  SDValue MulVal = DAG.getNode(ISD::MUL, DL, VT, N0, MulOper);
15699  return DAG.getNode(AddSubOpc, DL, VT, N0, MulVal);
15700  }
15701 
15702  // The below optimizations require a constant RHS.
15703  if (!isa<ConstantSDNode>(N1))
15704  return SDValue();
15705 
15706  ConstantSDNode *C = cast<ConstantSDNode>(N1);
15707  const APInt &ConstValue = C->getAPIntValue();
15708 
15709  // Allow the scaling to be folded into the `cnt` instruction by preventing
15710  // the scaling to be obscured here. This makes it easier to pattern match.
15711  if (IsSVECntIntrinsic(N0) ||
15712  (N0->getOpcode() == ISD::TRUNCATE &&
15713  (IsSVECntIntrinsic(N0->getOperand(0)))))
15714  if (ConstValue.sge(1) && ConstValue.sle(16))
15715  return SDValue();
15716 
15717  // Multiplication of a power of two plus/minus one can be done more
15718  // cheaply as as shift+add/sub. For now, this is true unilaterally. If
15719  // future CPUs have a cheaper MADD instruction, this may need to be
15720  // gated on a subtarget feature. For Cyclone, 32-bit MADD is 4 cycles and
15721  // 64-bit is 5 cycles, so this is always a win.
15722  // More aggressively, some multiplications N0 * C can be lowered to
15723  // shift+add+shift if the constant C = A * B where A = 2^N + 1 and B = 2^M,
15724  // e.g. 6=3*2=(2+1)*2, 45=(1+4)*(1+8)
15725  // TODO: lower more cases.
15726 
15727  // TrailingZeroes is used to test if the mul can be lowered to
15728  // shift+add+shift.
15729  unsigned TrailingZeroes = ConstValue.countTrailingZeros();
15730  if (TrailingZeroes) {
15731  // Conservatively do not lower to shift+add+shift if the mul might be
15732  // folded into smul or umul.
15733  if (N0->hasOneUse() && (isSignExtended(N0.getNode(), DAG) ||
15734  isZeroExtended(N0.getNode(), DAG)))
15735  return SDValue();
15736  // Conservatively do not lower to shift+add+shift if the mul might be
15737  // folded into madd or msub.
15738  if (N->hasOneUse() && (N->use_begin()->getOpcode() == ISD::ADD ||
15739  N->use_begin()->getOpcode() == ISD::SUB))
15740  return SDValue();
15741  }
15742  // Use ShiftedConstValue instead of ConstValue to support both shift+add/sub
15743  // and shift+add+shift.
15744  APInt ShiftedConstValue = ConstValue.ashr(TrailingZeroes);
15745  unsigned ShiftAmt;
15746 
15747  auto Shl = [&](SDValue N0, unsigned N1) {
15748  SDValue RHS = DAG.getConstant(N1, DL, MVT::i64);
15749  return DAG.getNode(ISD::SHL, DL, VT, N0, RHS);
15750  };
15751  auto Add = [&](SDValue N0, SDValue N1) {
15752  return DAG.getNode(ISD::ADD, DL, VT, N0, N1);
15753  };
15754  auto Sub = [&](SDValue N0, SDValue N1) {
15755  return DAG.getNode(ISD::SUB, DL, VT, N0, N1);
15756  };
15757  auto Negate = [&](SDValue N) {
15758  SDValue Zero = DAG.getConstant(0, DL, VT);
15759  return DAG.getNode(ISD::SUB, DL, VT, Zero, N);
15760  };
15761 
15762  // Can the const C be decomposed into (1+2^M1)*(1+2^N1), eg:
15763  // C = 45 is equal to (1+4)*(1+8), we don't decompose it into (1+2)*(16-1) as
15764  // the (2^N - 1) can't be execused via a single instruction.
15765  auto isPowPlusPlusConst = [](APInt C, APInt &M, APInt &N) {
15766  unsigned BitWidth = C.getBitWidth();
15767  for (unsigned i = 1; i < BitWidth / 2; i++) {
15768  APInt Rem;
15769  APInt X(BitWidth, (1 << i) + 1);
15770  APInt::sdivrem(C, X, N, Rem);
15771  APInt NVMinus1 = N - 1;
15772  if (Rem == 0 && NVMinus1.isPowerOf2()) {
15773  M = X;
15774  return true;
15775  }
15776  }
15777  return false;
15778  };
15779 
15780  if (ConstValue.isNonNegative()) {
15781  // (mul x, (2^N + 1) * 2^M) => (shl (add (shl x, N), x), M)
15782  // (mul x, 2^N - 1) => (sub (shl x, N), x)
15783  // (mul x, (2^(N-M) - 1) * 2^M) => (sub (shl x, N), (shl x, M))
15784  // (mul x, (2^M + 1) * (2^N + 1))
15785  // => MV = (add (shl x, M), x); (add (shl MV, N), MV)
15786  APInt SCVMinus1 = ShiftedConstValue - 1;
15787  APInt SCVPlus1 = ShiftedConstValue + 1;
15788  APInt CVPlus1 = ConstValue + 1;
15789  APInt CVM, CVN;
15790  if (SCVMinus1.isPowerOf2()) {
15791  ShiftAmt = SCVMinus1.logBase2();
15792  return Shl(Add(Shl(N0, ShiftAmt), N0), TrailingZeroes);
15793  } else if (CVPlus1.isPowerOf2()) {
15794  ShiftAmt = CVPlus1.logBase2();
15795  return Sub(Shl(N0, ShiftAmt), N0);
15796  } else if (SCVPlus1.isPowerOf2()) {
15797  ShiftAmt = SCVPlus1.logBase2() + TrailingZeroes;
15798  return Sub(Shl(N0, ShiftAmt), Shl(N0, TrailingZeroes));
15799  } else if (Subtarget->hasLSLFast() &&
15800  isPowPlusPlusConst(ConstValue, CVM, CVN)) {
15801  APInt CVMMinus1 = CVM - 1;
15802  APInt CVNMinus1 = CVN - 1;
15803  unsigned ShiftM1 = CVMMinus1.logBase2();
15804  unsigned ShiftN1 = CVNMinus1.logBase2();
15805  // LSLFast implicate that Shifts <= 3 places are fast
15806  if (ShiftM1 <= 3 && ShiftN1 <= 3) {
15807  SDValue MVal = Add(Shl(N0, ShiftM1), N0);
15808  return Add(Shl(MVal, ShiftN1), MVal);
15809  }
15810  }
15811  } else {
15812  // (mul x, -(2^N - 1)) => (sub x, (shl x, N))
15813  // (mul x, -(2^N + 1)) => - (add (shl x, N), x)
15814  // (mul x, -(2^(N-M) - 1) * 2^M) => (sub (shl x, M), (shl x, N))
15815  APInt SCVPlus1 = -ShiftedConstValue + 1;
15816  APInt CVNegPlus1 = -ConstValue + 1;
15817  APInt CVNegMinus1 = -ConstValue - 1;
15818  if (CVNegPlus1.isPowerOf2()) {
15819  ShiftAmt = CVNegPlus1.logBase2();
15820  return Sub(N0, Shl(N0, ShiftAmt));
15821  } else if (CVNegMinus1.isPowerOf2()) {
15822  ShiftAmt = CVNegMinus1.logBase2();
15823  return Negate(Add(Shl(N0, ShiftAmt), N0));
15824  } else if (SCVPlus1.isPowerOf2()) {
15825  ShiftAmt = SCVPlus1.logBase2() + TrailingZeroes;
15826  return Sub(Shl(N0, TrailingZeroes), Shl(N0, ShiftAmt));
15827  }
15828  }
15829 
15830  return SDValue();
15831 }
15832 
15834  SelectionDAG &DAG) {
15835  // Take advantage of vector comparisons producing 0 or -1 in each lane to
15836  // optimize away operation when it's from a constant.
15837  //
15838  // The general transformation is:
15839  // UNARYOP(AND(VECTOR_CMP(x,y), constant)) -->
15840  // AND(VECTOR_CMP(x,y), constant2)
15841  // constant2 = UNARYOP(constant)
15842 
15843  // Early exit if this isn't a vector operation, the operand of the
15844  // unary operation isn't a bitwise AND, or if the sizes of the operations
15845  // aren't the same.
15846  EVT VT = N->getValueType(0);
15847  if (!VT.isVector() || N->getOperand(0)->getOpcode() != ISD::AND ||
15848  N->getOperand(0)->getOperand(0)->getOpcode() != ISD::SETCC ||
15849  VT.getSizeInBits() != N->getOperand(0)->getValueType(0).getSizeInBits())
15850  return SDValue();
15851 
15852  // Now check that the other operand of the AND is a constant. We could
15853  // make the transformation for non-constant splats as well, but it's unclear
15854  // that would be a benefit as it would not eliminate any operations, just
15855  // perform one more step in scalar code before moving to the vector unit.
15856  if (BuildVectorSDNode *BV =
15857  dyn_cast<BuildVectorSDNode>(N->getOperand(0)->getOperand(1))) {
15858  // Bail out if the vector isn't a constant.
15859  if (!BV->isConstant())
15860  return SDValue();
15861 
15862  // Everything checks out. Build up the new and improved node.
15863  SDLoc DL(N);
15864  EVT IntVT = BV->getValueType(0);
15865  // Create a new constant of the appropriate type for the transformed
15866  // DAG.
15867  SDValue SourceConst = DAG.getNode(N->getOpcode(), DL, VT, SDValue(BV, 0));
15868  // The AND node needs bitcasts to/from an integer vector type around it.
15869  SDValue MaskConst = DAG.getNode(ISD::BITCAST, DL, IntVT, SourceConst);
15870  SDValue NewAnd = DAG.getNode(ISD::AND, DL, IntVT,
15871  N->getOperand(0)->getOperand(0), MaskConst);
15872  SDValue Res = DAG.getNode(ISD::BITCAST, DL, VT, NewAnd);
15873  return Res;
15874  }
15875 
15876  return SDValue();
15877 }
15878 
15880  const AArch64Subtarget *Subtarget) {
15881  // First try to optimize away the conversion when it's conditionally from
15882  // a constant. Vectors only.
15884  return Res;
15885 
15886  EVT VT = N->getValueType(0);
15887  if (VT != MVT::f32 && VT != MVT::f64)
15888  return SDValue();
15889 
15890  // Only optimize when the source and destination types have the same width.
15891  if (VT.getSizeInBits() != N->getOperand(0).getValueSizeInBits())
15892  return SDValue();
15893 
15894  // If the result of an integer load is only used by an integer-to-float
15895  // conversion, use a fp load instead and a AdvSIMD scalar {S|U}CVTF instead.
15896  // This eliminates an "integer-to-vector-move" UOP and improves throughput.
15897  SDValue N0 = N->getOperand(0);
15898  if (Subtarget->hasNEON() && ISD::isNormalLoad(N0.getNode()) && N0.hasOneUse() &&
15899  // Do not change the width of a volatile load.
15900  !cast<LoadSDNode>(N0)->isVolatile()) {
15901  LoadSDNode *LN0 = cast<LoadSDNode>(N0);
15902  SDValue Load = DAG.getLoad(VT, SDLoc(N), LN0->getChain(), LN0->getBasePtr(),
15903  LN0->getPointerInfo(), LN0->getAlign(),
15904  LN0->getMemOperand()->getFlags());
15905 
15906  // Make sure successors of the original load stay after it by updating them
15907  // to use the new Chain.
15908  DAG.ReplaceAllUsesOfValueWith(SDValue(LN0, 1), Load.getValue(1));
15909 
15910  unsigned Opcode =
15911  (N->getOpcode() == ISD::SINT_TO_FP) ? AArch64ISD::SITOF : AArch64ISD::UITOF;
15912  return DAG.getNode(Opcode, SDLoc(N), VT, Load);
15913  }
15914 
15915  return SDValue();
15916 }
15917 
15918 /// Fold a floating-point multiply by power of two into floating-point to
15919 /// fixed-point conversion.
15922  const AArch64Subtarget *Subtarget) {
15923  if (!Subtarget->hasNEON() || Subtarget->forceStreamingCompatibleSVE())
15924  return SDValue();
15925 
15926  if (!N->getValueType(0).isSimple())
15927  return SDValue();
15928 
15929  SDValue Op = N->getOperand(0);
15930  if (!Op.getValueType().isSimple() || Op.getOpcode() != ISD::FMUL)
15931  return SDValue();
15932 
15933  if (!Op.getValueType().is64BitVector() && !Op.getValueType().is128BitVector())
15934  return SDValue();
15935 
15936  SDValue ConstVec = Op->getOperand(1);
15937  if (!isa<BuildVectorSDNode>(ConstVec))
15938  return SDValue();
15939 
15940  MVT FloatTy = Op.getSimpleValueType().getVectorElementType();
15941  uint32_t FloatBits = FloatTy.getSizeInBits();
15942  if (FloatBits != 32 && FloatBits != 64 &&
15943  (FloatBits != 16 || !Subtarget->hasFullFP16()))
15944  return SDValue();
15945 
15946  MVT IntTy = N->getSimpleValueType(0).getVectorElementType();
15947  uint32_t IntBits = IntTy.getSizeInBits();
15948  if (IntBits != 16 && IntBits != 32 && IntBits != 64)
15949  return SDValue();
15950 
15951  // Avoid conversions where iN is larger than the float (e.g., float -> i64).
15952  if (IntBits > FloatBits)
15953  return SDValue();
15954 
15955  BitVector UndefElements;
15956  BuildVectorSDNode *BV = cast<BuildVectorSDNode>(ConstVec);
15957  int32_t Bits = IntBits == 64 ? 64 : 32;
15958  int32_t C = BV->getConstantFPSplatPow2ToLog2Int(&UndefElements, Bits + 1);
15959  if (C == -1 || C == 0 || C > Bits)
15960  return SDValue();
15961 
15962  EVT ResTy = Op.getValueType().changeVectorElementTypeToInteger();
15963  if (!DAG.getTargetLoweringInfo().isTypeLegal(ResTy))
15964  return SDValue();
15965 
15966  if (N->getOpcode() == ISD::FP_TO_SINT_SAT ||
15967  N->getOpcode() == ISD::FP_TO_UINT_SAT) {
15968  EVT SatVT = cast<VTSDNode>(N->getOperand(1))->getVT();
15969  if (SatVT.getScalarSizeInBits() != IntBits || IntBits != FloatBits)
15970  return SDValue();
15971  }
15972 
15973  SDLoc DL(N);
15974  bool IsSigned = (N->getOpcode() == ISD::FP_TO_SINT ||
15975  N->getOpcode() == ISD::FP_TO_SINT_SAT);
15976  unsigned IntrinsicOpcode = IsSigned ? Intrinsic::aarch64_neon_vcvtfp2fxs
15977  : Intrinsic::aarch64_neon_vcvtfp2fxu;
15978  SDValue FixConv =
15979  DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, ResTy,
15980  DAG.getConstant(IntrinsicOpcode, DL, MVT::i32),
15981  Op->getOperand(0), DAG.getConstant(C, DL, MVT::i32));
15982  // We can handle smaller integers by generating an extra trunc.
15983  if (IntBits < FloatBits)
15984  FixConv = DAG.getNode(ISD::TRUNCATE, DL, N->getValueType(0), FixConv);
15985 
15986  return FixConv;
15987 }
15988 
15989 /// Fold a floating-point divide by power of two into fixed-point to
15990 /// floating-point conversion.
15993  const AArch64Subtarget *Subtarget) {
15994  if (!Subtarget->hasNEON())
15995  return SDValue();
15996 
15997  SDValue Op = N->getOperand(0);
15998  unsigned Opc = Op->getOpcode();
15999  if (!Op.getValueType().isVector() || !Op.getValueType().isSimple() ||
16000  !Op.getOperand(0).getValueType().isSimple() ||
16001  (Opc != ISD::SINT_TO_FP && Opc != ISD::UINT_TO_FP))
16002  return SDValue();
16003 
16004  SDValue ConstVec = N->getOperand(1);
16005  if (!isa<BuildVectorSDNode>(ConstVec))
16006  return SDValue();
16007 
16008  MVT IntTy = Op.getOperand(0).getSimpleValueType().getVectorElementType();
16009  int32_t IntBits = IntTy.getSizeInBits();
16010  if (IntBits != 16 && IntBits != 32 && IntBits != 64)
16011  return SDValue();
16012 
16013  MVT FloatTy = N->getSimpleValueType(0).getVectorElementType();
16014  int32_t FloatBits = FloatTy.getSizeInBits();
16015  if (FloatBits != 32 && FloatBits != 64)
16016  return SDValue();
16017 
16018  // Avoid conversions where iN is larger than the float (e.g., i64 -> float).
16019  if (IntBits > FloatBits)
16020  return SDValue();
16021 
16022  BitVector UndefElements;
16023  BuildVectorSDNode *BV = cast<BuildVectorSDNode>(ConstVec);
16024  int32_t C = BV->getConstantFPSplatPow2ToLog2Int(&UndefElements, FloatBits + 1);
16025  if (C == -1 || C == 0 || C > FloatBits)
16026  return SDValue();
16027 
16028  MVT ResTy;
16029  unsigned NumLanes = Op.getValueType().getVectorNumElements();
16030  switch (NumLanes) {
16031  default:
16032  return SDValue();
16033  case 2:
16034  ResTy = FloatBits == 32 ? MVT::v2i32 : MVT::v2i64;
16035  break;
16036  case 4:
16037  ResTy = FloatBits == 32 ? MVT::v4i32 : MVT::v4i64;
16038  break;
16039  }
16040 
16041  if (ResTy == MVT::v4i64 && DCI.isBeforeLegalizeOps())
16042  return SDValue();
16043 
16044  SDLoc DL(N);
16045  SDValue ConvInput = Op.getOperand(0);
16046  bool IsSigned = Opc == ISD::SINT_TO_FP;
16047  if (IntBits < FloatBits)
16048  ConvInput = DAG.getNode(IsSigned ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND, DL,
16049  ResTy, ConvInput);
16050 
16051  unsigned IntrinsicOpcode = IsSigned ? Intrinsic::aarch64_neon_vcvtfxs2fp
16052  : Intrinsic::aarch64_neon_vcvtfxu2fp;
16053  return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, Op.getValueType(),
16054  DAG.getConstant(IntrinsicOpcode, DL, MVT::i32), ConvInput,
16055  DAG.getConstant(C, DL, MVT::i32));
16056 }
16057 
16058 /// An EXTR instruction is made up of two shifts, ORed together. This helper
16059 /// searches for and classifies those shifts.
16060 static bool findEXTRHalf(SDValue N, SDValue &Src, uint32_t &ShiftAmount,
16061  bool &FromHi) {
16062  if (N.getOpcode() == ISD::SHL)
16063  FromHi = false;
16064  else if (N.getOpcode() == ISD::SRL)
16065  FromHi = true;
16066  else
16067  return false;
16068 
16069  if (!isa<ConstantSDNode>(N.getOperand(1)))
16070  return false;
16071 
16072  ShiftAmount = N->getConstantOperandVal(1);
16073  Src = N->getOperand(0);
16074  return true;
16075 }
16076 
16077 /// EXTR instruction extracts a contiguous chunk of bits from two existing
16078 /// registers viewed as a high/low pair. This function looks for the pattern:
16079 /// <tt>(or (shl VAL1, \#N), (srl VAL2, \#RegWidth-N))</tt> and replaces it
16080 /// with an EXTR. Can't quite be done in TableGen because the two immediates
16081 /// aren't independent.
16084  SelectionDAG &DAG = DCI.DAG;
16085  SDLoc DL(N);
16086  EVT VT = N->getValueType(0);
16087 
16088  assert(N->getOpcode() == ISD::OR && "Unexpected root");
16089 
16090  if (VT != MVT::i32 && VT != MVT::i64)
16091  return SDValue();
16092 
16093  SDValue LHS;
16094  uint32_t ShiftLHS = 0;
16095  bool LHSFromHi = false;
16096  if (!findEXTRHalf(N->getOperand(0), LHS, ShiftLHS, LHSFromHi))
16097  return SDValue();
16098 
16099  SDValue RHS;
16100  uint32_t ShiftRHS = 0;
16101  bool RHSFromHi = false;
16102  if (!findEXTRHalf(N->getOperand(1), RHS, ShiftRHS, RHSFromHi))
16103  return SDValue();
16104 
16105  // If they're both trying to come from the high part of the register, they're
16106  // not really an EXTR.
16107  if (LHSFromHi == RHSFromHi)
16108  return SDValue();
16109 
16110  if (ShiftLHS + ShiftRHS != VT.getSizeInBits())
16111  return SDValue();
16112 
16113  if (LHSFromHi) {
16114  std::swap(LHS, RHS);
16115  std::swap(ShiftLHS, ShiftRHS);
16116  }
16117 
16118  return DAG.getNode(AArch64ISD::EXTR, DL, VT, LHS, RHS,
16119  DAG.getConstant(ShiftRHS, DL, MVT::i64));
16120 }
16121 
16123  const AArch64TargetLowering &TLI) {
16124  EVT VT = N->getValueType(0);
16125  SelectionDAG &DAG = DCI.DAG;
16126  SDLoc DL(N);
16127 
16128  if (!VT.isVector())
16129  return SDValue();
16130 
16131  // The combining code currently only works for NEON vectors. In particular,
16132  // it does not work for SVE when dealing with vectors wider than 128 bits.
16133  // It also doesn't work for streaming mode because it causes generating
16134  // bsl instructions that are invalid in streaming mode.
16136  VT,
16138  return SDValue();
16139 
16140  SDValue N0 = N->getOperand(0);
16141  if (N0.getOpcode() != ISD::AND)
16142  return SDValue();
16143 
16144  SDValue N1 = N->getOperand(1);
16145  if (N1.getOpcode() != ISD::AND)
16146  return SDValue();
16147 
16148  // InstCombine does (not (neg a)) => (add a -1).
16149  // Try: (or (and (neg a) b) (and (add a -1) c)) => (bsl (neg a) b c)
16150  // Loop over all combinations of AND operands.
16151  for (int i = 1; i >= 0; --i) {
16152  for (int j = 1; j >= 0; --j) {
16153  SDValue O0 = N0->getOperand(i);
16154  SDValue O1 = N1->getOperand(j);
16155  SDValue Sub, Add, SubSibling, AddSibling;
16156 
16157  // Find a SUB and an ADD operand, one from each AND.
16158  if (O0.getOpcode() == ISD::SUB && O1.getOpcode() == ISD::ADD) {
16159  Sub = O0;
16160  Add = O1;
16161  SubSibling = N0->getOperand(1 - i);
16162  AddSibling = N1->getOperand(1 - j);
16163  } else if (O0.getOpcode() == ISD::ADD && O1.getOpcode() == ISD::SUB) {
16164  Add = O0;
16165  Sub = O1;
16166  AddSibling = N0->getOperand(1 - i);
16167  SubSibling = N1->getOperand(1 - j);
16168  } else
16169  continue;
16170 
16172  continue;
16173 
16174  // Constant ones is always righthand operand of the Add.
16175  if (!ISD::isBuildVectorAllOnes(Add.getOperand(1).getNode()))
16176  continue;
16177 
16178  if (Sub.getOperand(1) != Add.getOperand(0))
16179  continue;
16180 
16181  return DAG.getNode(AArch64ISD::BSP, DL, VT, Sub, SubSibling, AddSibling);
16182  }
16183  }
16184 
16185  // (or (and a b) (and (not a) c)) => (bsl a b c)
16186  // We only have to look for constant vectors here since the general, variable
16187  // case can be handled in TableGen.
16188  unsigned Bits = VT.getScalarSizeInBits();
16189  uint64_t BitMask = Bits == 64 ? -1ULL : ((1ULL << Bits) - 1);
16190  for (int i = 1; i >= 0; --i)
16191  for (int j = 1; j >= 0; --j) {
16192  BuildVectorSDNode *BVN0 = dyn_cast<BuildVectorSDNode>(N0->getOperand(i));
16193  BuildVectorSDNode *BVN1 = dyn_cast<BuildVectorSDNode>(N1->getOperand(j));
16194  if (!BVN0 || !BVN1)
16195  continue;
16196 
16197  bool FoundMatch = true;
16198  for (unsigned k = 0; k < VT.getVectorNumElements(); ++k) {
16199  ConstantSDNode *CN0 = dyn_cast<ConstantSDNode>(BVN0->getOperand(k));
16200  ConstantSDNode *CN1 = dyn_cast<ConstantSDNode>(BVN1->getOperand(k));
16201  if (!CN0 || !CN1 ||
16202  CN0->getZExtValue() != (BitMask & ~CN1->getZExtValue())) {
16203  FoundMatch = false;
16204  break;
16205  }
16206  }
16207 
16208  if (FoundMatch)
16209  return DAG.getNode(AArch64ISD::BSP, DL, VT, SDValue(BVN0, 0),
16210  N0->getOperand(1 - i), N1->getOperand(1 - j));
16211  }
16212 
16213  return SDValue();
16214 }
16215 
16216 // Given a tree of and/or(csel(0, 1, cc0), csel(0, 1, cc1)), we may be able to
16217 // convert to csel(ccmp(.., cc0)), depending on cc1:
16218 
16219 // (AND (CSET cc0 cmp0) (CSET cc1 (CMP x1 y1)))
16220 // =>
16221 // (CSET cc1 (CCMP x1 y1 !cc1 cc0 cmp0))
16222 //
16223 // (OR (CSET cc0 cmp0) (CSET cc1 (CMP x1 y1)))
16224 // =>
16225 // (CSET cc1 (CCMP x1 y1 cc1 !cc0 cmp0))
16227  EVT VT = N->getValueType(0);
16228  SDValue CSel0 = N->getOperand(0);
16229  SDValue CSel1 = N->getOperand(1);
16230 
16231  if (CSel0.getOpcode() != AArch64ISD::CSEL ||
16232  CSel1.getOpcode() != AArch64ISD::CSEL)
16233  return SDValue();
16234 
16235  if (!CSel0->hasOneUse() || !CSel1->hasOneUse())
16236  return SDValue();
16237 
16238  if (!isNullConstant(CSel0.getOperand(0)) ||
16239  !isOneConstant(CSel0.getOperand(1)) ||
16240  !isNullConstant(CSel1.getOperand(0)) ||
16241  !isOneConstant(CSel1.getOperand(1)))
16242  return SDValue();
16243 
16244  SDValue Cmp0 = CSel0.getOperand(3);
16245  SDValue Cmp1 = CSel1.getOperand(3);
16248  if (!Cmp0->hasOneUse() || !Cmp1->hasOneUse())
16249  return SDValue();
16250  if (Cmp1.getOpcode() != AArch64ISD::SUBS &&
16251  Cmp0.getOpcode() == AArch64ISD::SUBS) {
16252  std::swap(Cmp0, Cmp1);
16253  std::swap(CC0, CC1);
16254  }
16255 
16256  if (Cmp1.getOpcode() != AArch64ISD::SUBS)
16257  return SDValue();
16258 
16259  SDLoc DL(N);
16260  SDValue CCmp, Condition;
16261  unsigned NZCV;
16262 
16263  if (N->getOpcode() == ISD::AND) {
16265  Condition = DAG.getConstant(InvCC0, DL, MVT_CC);
16267  } else {
16269  Condition = DAG.getConstant(CC0, DL, MVT_CC);
16270  NZCV = AArch64CC::getNZCVToSatisfyCondCode(InvCC1);
16271  }
16272 
16273  SDValue NZCVOp = DAG.getConstant(NZCV, DL, MVT::i32);
16274 
16275  auto *Op1 = dyn_cast<ConstantSDNode>(Cmp1.getOperand(1));
16276  if (Op1 && Op1->getAPIntValue().isNegative() &&
16277  Op1->getAPIntValue().sgt(-32)) {
16278  // CCMP accept the constant int the range [0, 31]
16279  // if the Op1 is a constant in the range [-31, -1], we
16280  // can select to CCMN to avoid the extra mov
16281  SDValue AbsOp1 =
16282  DAG.getConstant(Op1->getAPIntValue().abs(), DL, Op1->getValueType(0));
16283  CCmp = DAG.getNode(AArch64ISD::CCMN, DL, MVT_CC, Cmp1.getOperand(0), AbsOp1,
16284  NZCVOp, Condition, Cmp0);
16285  } else {
16286  CCmp = DAG.getNode(AArch64ISD::CCMP, DL, MVT_CC, Cmp1.getOperand(0),
16287  Cmp1.getOperand(1), NZCVOp, Condition, Cmp0);
16288  }
16289  return DAG.getNode(AArch64ISD::CSEL, DL, VT, CSel0.getOperand(0),
16290  CSel0.getOperand(1), DAG.getConstant(CC1, DL, MVT::i32),
16291  CCmp);
16292 }
16293 
16295  const AArch64Subtarget *Subtarget,
16296  const AArch64TargetLowering &TLI) {
16297  SelectionDAG &DAG = DCI.DAG;
16298  EVT VT = N->getValueType(0);
16299 
16300  if (SDValue R = performANDORCSELCombine(N, DAG))
16301  return R;
16302 
16303  if (!DAG.getTargetLoweringInfo().isTypeLegal(VT))
16304  return SDValue();
16305 
16306  // Attempt to form an EXTR from (or (shl VAL1, #N), (srl VAL2, #RegWidth-N))
16307  if (SDValue Res = tryCombineToEXTR(N, DCI))
16308  return Res;
16309 
16310  if (SDValue Res = tryCombineToBSL(N, DCI, TLI))
16311  return Res;
16312 
16313  return SDValue();
16314 }
16315 
16317  if (!MemVT.getVectorElementType().isSimple())
16318  return false;
16319 
16320  uint64_t MaskForTy = 0ull;
16321  switch (MemVT.getVectorElementType().getSimpleVT().SimpleTy) {
16322  case MVT::i8:
16323  MaskForTy = 0xffull;
16324  break;
16325  case MVT::i16:
16326  MaskForTy = 0xffffull;
16327  break;
16328  case MVT::i32:
16329  MaskForTy = 0xffffffffull;
16330  break;
16331  default:
16332  return false;
16333  break;
16334  }
16335 
16336  if (N->getOpcode() == AArch64ISD::DUP || N->getOpcode() == ISD::SPLAT_VECTOR)
16337  if (auto *Op0 = dyn_cast<ConstantSDNode>(N->getOperand(0)))
16338  return Op0->getAPIntValue().getLimitedValue() == MaskForTy;
16339 
16340  return false;
16341 }
16342 
16344  // Look through cast.
16345  while (N.getOpcode() == AArch64ISD::REINTERPRET_CAST)
16346  N = N.getOperand(0);
16347 
16348  return ISD::isConstantSplatVectorAllZeros(N.getNode());
16349 }
16350 
16352  unsigned NumElts = N.getValueType().getVectorMinNumElements();
16353 
16354  // Look through cast.
16355  while (N.getOpcode() == AArch64ISD::REINTERPRET_CAST) {
16356  N = N.getOperand(0);
16357  // When reinterpreting from a type with fewer elements the "new" elements
16358  // are not active, so bail if they're likely to be used.
16359  if (N.getValueType().getVectorMinNumElements() < NumElts)
16360  return false;
16361  }
16362 
16363  if (ISD::isConstantSplatVectorAllOnes(N.getNode()))
16364  return true;
16365 
16366  // "ptrue p.<ty>, all" can be considered all active when <ty> is the same size
16367  // or smaller than the implicit element type represented by N.
16368  // NOTE: A larger element count implies a smaller element type.
16369  if (N.getOpcode() == AArch64ISD::PTRUE &&
16370  N.getConstantOperandVal(0) == AArch64SVEPredPattern::all)
16371  return N.getValueType().getVectorMinNumElements() >= NumElts;
16372 
16373  // If we're compiling for a specific vector-length, we can check if the
16374  // pattern's VL equals that of the scalable vector at runtime.
16375  if (N.getOpcode() == AArch64ISD::PTRUE) {
16376  const auto &Subtarget = DAG.getSubtarget<AArch64Subtarget>();
16377  unsigned MinSVESize = Subtarget.getMinSVEVectorSizeInBits();
16378  unsigned MaxSVESize = Subtarget.getMaxSVEVectorSizeInBits();
16379  if (MaxSVESize && MinSVESize == MaxSVESize) {
16380  unsigned VScale = MaxSVESize / AArch64::SVEBitsPerBlock;
16381  unsigned PatNumElts =
16382  getNumElementsFromSVEPredPattern(N.getConstantOperandVal(0));
16383  return PatNumElts == (NumElts * VScale);
16384  }
16385  }
16386 
16387  return false;
16388 }
16389 
16391  SDValue LeafOp = SDValue(N, 0);
16392  SDValue Op = N->getOperand(0);
16393  while (Op.getOpcode() == AArch64ISD::REINTERPRET_CAST &&
16394  LeafOp.getValueType() != Op.getValueType())
16395  Op = Op->getOperand(0);
16396  if (LeafOp.getValueType() == Op.getValueType())
16397  return Op;
16398  return SDValue();
16399 }
16400 
16403  if (DCI.isBeforeLegalizeOps())
16404  return SDValue();
16405 
16406  SelectionDAG &DAG = DCI.DAG;
16407  SDValue Src = N->getOperand(0);
16408  unsigned Opc = Src->getOpcode();
16409 
16410  // Zero/any extend of an unsigned unpack
16411  if (Opc == AArch64ISD::UUNPKHI || Opc == AArch64ISD::UUNPKLO) {
16412  SDValue UnpkOp = Src->getOperand(0);
16413  SDValue Dup = N->getOperand(1);
16414 
16415  if (Dup.getOpcode() != ISD::SPLAT_VECTOR)
16416  return SDValue();
16417 
16418  SDLoc DL(N);
16419  ConstantSDNode *C = dyn_cast<ConstantSDNode>(Dup->getOperand(0));
16420  if (!C)
16421  return SDValue();
16422 
16423  uint64_t ExtVal = C->getZExtValue();
16424 
16425  // If the mask is fully covered by the unpack, we don't need to push
16426  // a new AND onto the operand
16427  EVT EltTy = UnpkOp->getValueType(0).getVectorElementType();
16428  if ((ExtVal == 0xFF && EltTy == MVT::i8) ||
16429  (ExtVal == 0xFFFF && EltTy == MVT::i16) ||
16430  (ExtVal == 0xFFFFFFFF && EltTy == MVT::i32))
16431  return Src;
16432 
16433  // Truncate to prevent a DUP with an over wide constant
16434  APInt Mask = C->getAPIntValue().trunc(EltTy.getSizeInBits());
16435 
16436  // Otherwise, make sure we propagate the AND to the operand
16437  // of the unpack
16438  Dup = DAG.getNode(ISD::SPLAT_VECTOR, DL, UnpkOp->getValueType(0),
16439  DAG.getConstant(Mask.zextOrTrunc(32), DL, MVT::i32));
16440 
16441  SDValue And = DAG.getNode(ISD::AND, DL,
16442  UnpkOp->getValueType(0), UnpkOp, Dup);
16443 
16444  return DAG.getNode(Opc, DL, N->getValueType(0), And);
16445  }
16446 
16447  // If both sides of AND operations are i1 splat_vectors then
16448  // we can produce just i1 splat_vector as the result.
16449  if (isAllActivePredicate(DAG, N->getOperand(0)))
16450  return N->getOperand(1);
16451  if (isAllActivePredicate(DAG, N->getOperand(1)))
16452  return N->getOperand(0);
16453 
16455  return SDValue();
16456 
16457  SDValue Mask = N->getOperand(1);
16458 
16459  if (!Src.hasOneUse())
16460  return SDValue();
16461 
16462  EVT MemVT;
16463 
16464  // SVE load instructions perform an implicit zero-extend, which makes them
16465  // perfect candidates for combining.
16466  switch (Opc) {
16470  MemVT = cast<VTSDNode>(Src->getOperand(3))->getVT();
16471  break;
16487  MemVT = cast<VTSDNode>(Src->getOperand(4))->getVT();
16488  break;
16489  default:
16490  return SDValue();
16491  }
16492 
16493  if (isConstantSplatVectorMaskForType(Mask.getNode(), MemVT))
16494  return Src;
16495 
16496  return SDValue();
16497 }
16498 
16501  SelectionDAG &DAG = DCI.DAG;
16502  SDValue LHS = N->getOperand(0);
16503  SDValue RHS = N->getOperand(1);
16504  EVT VT = N->getValueType(0);
16505 
16506  if (SDValue R = performANDORCSELCombine(N, DAG))
16507  return R;
16508 
16509  if (!DAG.getTargetLoweringInfo().isTypeLegal(VT))
16510  return SDValue();
16511 
16512  if (VT.isScalableVector())
16513  return performSVEAndCombine(N, DCI);
16514 
16515  // The combining code below works only for NEON vectors. In particular, it
16516  // does not work for SVE when dealing with vectors wider than 128 bits.
16517  if (!VT.is64BitVector() && !VT.is128BitVector())
16518  return SDValue();
16519 
16520  BuildVectorSDNode *BVN = dyn_cast<BuildVectorSDNode>(RHS.getNode());
16521  if (!BVN)
16522  return SDValue();
16523 
16524  // AND does not accept an immediate, so check if we can use a BIC immediate
16525  // instruction instead. We do this here instead of using a (and x, (mvni imm))
16526  // pattern in isel, because some immediates may be lowered to the preferred
16527  // (and x, (movi imm)) form, even though an mvni representation also exists.
16528  APInt DefBits(VT.getSizeInBits(), 0);
16529  APInt UndefBits(VT.getSizeInBits(), 0);
16530  if (resolveBuildVector(BVN, DefBits, UndefBits)) {
16531  SDValue NewOp;
16532 
16533  DefBits = ~DefBits;
16534  if ((NewOp = tryAdvSIMDModImm32(AArch64ISD::BICi, SDValue(N, 0), DAG,
16535  DefBits, &LHS)) ||
16536  (NewOp = tryAdvSIMDModImm16(AArch64ISD::BICi, SDValue(N, 0), DAG,
16537  DefBits, &LHS)))
16538  return NewOp;
16539 
16540  UndefBits = ~UndefBits;
16541  if ((NewOp = tryAdvSIMDModImm32(AArch64ISD::BICi, SDValue(N, 0), DAG,
16542  UndefBits, &LHS)) ||
16543  (NewOp = tryAdvSIMDModImm16(AArch64ISD::BICi, SDValue(N, 0), DAG,
16544  UndefBits, &LHS)))
16545  return NewOp;
16546  }
16547 
16548  return SDValue();
16549 }
16550 
16551 static bool hasPairwiseAdd(unsigned Opcode, EVT VT, bool FullFP16) {
16552  switch (Opcode) {
16553  case ISD::STRICT_FADD:
16554  case ISD::FADD:
16555  return (FullFP16 && VT == MVT::f16) || VT == MVT::f32 || VT == MVT::f64;
16556  case ISD::ADD:
16557  return VT == MVT::i64;
16558  default:
16559  return false;
16560  }
16561 }
16562 
16563 static SDValue getPTest(SelectionDAG &DAG, EVT VT, SDValue Pg, SDValue Op,
16565 
16567  if ((N.getOpcode() == ISD::SETCC) ||
16568  (N.getOpcode() == ISD::INTRINSIC_WO_CHAIN &&
16569  (N.getConstantOperandVal(0) == Intrinsic::aarch64_sve_whilege ||
16570  N.getConstantOperandVal(0) == Intrinsic::aarch64_sve_whilegt ||
16571  N.getConstantOperandVal(0) == Intrinsic::aarch64_sve_whilehi ||
16572  N.getConstantOperandVal(0) == Intrinsic::aarch64_sve_whilehs ||
16573  N.getConstantOperandVal(0) == Intrinsic::aarch64_sve_whilele ||
16574  N.getConstantOperandVal(0) == Intrinsic::aarch64_sve_whilelo ||
16575  N.getConstantOperandVal(0) == Intrinsic::aarch64_sve_whilels ||
16576  N.getConstantOperandVal(0) == Intrinsic::aarch64_sve_whilelt ||
16577  // get_active_lane_mask is lowered to a whilelo instruction.
16578  N.getConstantOperandVal(0) == Intrinsic::get_active_lane_mask)))
16579  return true;
16580 
16581  return false;
16582 }
16583 
16584 // Materialize : i1 = extract_vector_elt t37, Constant:i64<0>
16585 // ... into: "ptrue p, all" + PTEST
16586 static SDValue
16589  const AArch64Subtarget *Subtarget) {
16590  assert(N->getOpcode() == ISD::EXTRACT_VECTOR_ELT);
16591  // Make sure PTEST can be legalised with illegal types.
16592  if (!Subtarget->hasSVE() || DCI.isBeforeLegalize())
16593  return SDValue();
16594 
16595  SDValue N0 = N->getOperand(0);
16596  EVT VT = N0.getValueType();
16597 
16598  if (!VT.isScalableVector() || VT.getVectorElementType() != MVT::i1 ||
16599  !isNullConstant(N->getOperand(1)))
16600  return SDValue();
16601 
16602  // Restricted the DAG combine to only cases where we're extracting from a
16603  // flag-setting operation.
16604  if (!isPredicateCCSettingOp(N0))
16605  return SDValue();
16606 
16607  // Extracts of lane 0 for SVE can be expressed as PTEST(Op, FIRST) ? 1 : 0
16608  SelectionDAG &DAG = DCI.DAG;
16610  return getPTest(DAG, N->getValueType(0), Pg, N0, AArch64CC::FIRST_ACTIVE);
16611 }
16612 
16613 // Materialize : Idx = (add (mul vscale, NumEls), -1)
16614 // i1 = extract_vector_elt t37, Constant:i64<Idx>
16615 // ... into: "ptrue p, all" + PTEST
16616 static SDValue
16619  const AArch64Subtarget *Subtarget) {
16620  assert(N->getOpcode() == ISD::EXTRACT_VECTOR_ELT);
16621  // Make sure PTEST is legal types.
16622  if (!Subtarget->hasSVE() || DCI.isBeforeLegalize())
16623  return SDValue();
16624 
16625  SDValue N0 = N->getOperand(0);
16626  EVT OpVT = N0.getValueType();
16627 
16628  if (!OpVT.isScalableVector() || OpVT.getVectorElementType() != MVT::i1)
16629  return SDValue();
16630 
16631  // Idx == (add (mul vscale, NumEls), -1)
16632  SDValue Idx = N->getOperand(1);
16633  if (Idx.getOpcode() != ISD::ADD || !isAllOnesConstant(Idx.getOperand(1)))
16634  return SDValue();
16635 
16636  SDValue VS = Idx.getOperand(0);
16637  if (VS.getOpcode() != ISD::VSCALE)
16638  return SDValue();
16639 
16640  unsigned NumEls = OpVT.getVectorElementCount().getKnownMinValue();
16641  if (VS.getConstantOperandVal(0) != NumEls)
16642  return SDValue();
16643 
16644  // Extracts of lane EC-1 for SVE can be expressed as PTEST(Op, LAST) ? 1 : 0
16645  SelectionDAG &DAG = DCI.DAG;
16646  SDValue Pg = getPTrue(DAG, SDLoc(N), OpVT, AArch64SVEPredPattern::all);
16647  return getPTest(DAG, N->getValueType(0), Pg, N0, AArch64CC::LAST_ACTIVE);
16648 }
16649 
16650 static SDValue
16652  const AArch64Subtarget *Subtarget) {
16653  assert(N->getOpcode() == ISD::EXTRACT_VECTOR_ELT);
16654  if (SDValue Res = performFirstTrueTestVectorCombine(N, DCI, Subtarget))
16655  return Res;
16656  if (SDValue Res = performLastTrueTestVectorCombine(N, DCI, Subtarget))
16657  return Res;
16658 
16659  SelectionDAG &DAG = DCI.DAG;
16660  SDValue N0 = N->getOperand(0), N1 = N->getOperand(1);
16661  ConstantSDNode *ConstantN1 = dyn_cast<ConstantSDNode>(N1);
16662 
16663  EVT VT = N->getValueType(0);
16664  const bool FullFP16 = DAG.getSubtarget<AArch64Subtarget>().hasFullFP16();
16665  bool IsStrict = N0->isStrictFPOpcode();
16666 
16667  // extract(dup x) -> x
16668  if (N0.getOpcode() == AArch64ISD::DUP)
16669  return DAG.getZExtOrTrunc(N0.getOperand(0), SDLoc(N), VT);
16670 
16671  // Rewrite for pairwise fadd pattern
16672  // (f32 (extract_vector_elt
16673  // (fadd (vXf32 Other)
16674  // (vector_shuffle (vXf32 Other) undef <1,X,...> )) 0))
16675  // ->
16676  // (f32 (fadd (extract_vector_elt (vXf32 Other) 0)
16677  // (extract_vector_elt (vXf32 Other) 1))
16678  // For strict_fadd we need to make sure the old strict_fadd can be deleted, so
16679  // we can only do this when it's used only by the extract_vector_elt.
16680  if (ConstantN1 && ConstantN1->getZExtValue() == 0 &&
16681  hasPairwiseAdd(N0->getOpcode(), VT, FullFP16) &&
16682  (!IsStrict || N0.hasOneUse())) {
16683  SDLoc DL(N0);
16684  SDValue N00 = N0->getOperand(IsStrict ? 1 : 0);
16685  SDValue N01 = N0->getOperand(IsStrict ? 2 : 1);
16686 
16687  ShuffleVectorSDNode *Shuffle = dyn_cast<ShuffleVectorSDNode>(N01);
16688  SDValue Other = N00;
16689 
16690  // And handle the commutative case.
16691  if (!Shuffle) {
16692  Shuffle = dyn_cast<ShuffleVectorSDNode>(N00);
16693  Other = N01;
16694  }
16695 
16696  if (Shuffle && Shuffle->getMaskElt(0) == 1 &&
16697  Other == Shuffle->getOperand(0)) {
16698  SDValue Extract1 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, VT, Other,
16699  DAG.getConstant(0, DL, MVT::i64));
16700  SDValue Extract2 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, VT, Other,
16701  DAG.getConstant(1, DL, MVT::i64));
16702  if (!IsStrict)
16703  return DAG.getNode(N0->getOpcode(), DL, VT, Extract1, Extract2);
16704 
16705  // For strict_fadd we need uses of the final extract_vector to be replaced
16706  // with the strict_fadd, but we also need uses of the chain output of the
16707  // original strict_fadd to use the chain output of the new strict_fadd as
16708  // otherwise it may not be deleted.
16709  SDValue Ret = DAG.getNode(N0->getOpcode(), DL,
16710  {VT, MVT::Other},
16711  {N0->getOperand(0), Extract1, Extract2});
16713  DAG.ReplaceAllUsesOfValueWith(N0.getValue(1), Ret.getValue(1));
16714  return SDValue(N, 0);
16715  }
16716  }
16717 
16718  return SDValue();
16719 }
16720 
16723  SelectionDAG &DAG) {
16724  SDLoc dl(N);
16725  EVT VT = N->getValueType(0);
16726  SDValue N0 = N->getOperand(0), N1 = N->getOperand(1);
16727  unsigned N0Opc = N0->getOpcode(), N1Opc = N1->getOpcode();
16728 
16729  if (VT.isScalableVector())
16730  return SDValue();
16731 
16732  // Optimize concat_vectors of truncated vectors, where the intermediate
16733  // type is illegal, to avoid said illegality, e.g.,
16734  // (v4i16 (concat_vectors (v2i16 (truncate (v2i64))),
16735  // (v2i16 (truncate (v2i64)))))
16736  // ->
16737  // (v4i16 (truncate (vector_shuffle (v4i32 (bitcast (v2i64))),
16738  // (v4i32 (bitcast (v2i64))),
16739  // <0, 2, 4, 6>)))
16740  // This isn't really target-specific, but ISD::TRUNCATE legality isn't keyed
16741  // on both input and result type, so we might generate worse code.
16742  // On AArch64 we know it's fine for v2i64->v4i16 and v4i32->v8i8.
16743  if (N->getNumOperands() == 2 && N0Opc == ISD::TRUNCATE &&
16744  N1Opc == ISD::TRUNCATE) {
16745  SDValue N00 = N0->getOperand(0);
16746  SDValue N10 = N1->getOperand(0);
16747  EVT N00VT = N00.getValueType();
16748 
16749  if (N00VT == N10.getValueType() &&
16750  (N00VT == MVT::v2i64 || N00VT == MVT::v4i32) &&
16751  N00VT.getScalarSizeInBits() == 4 * VT.getScalarSizeInBits()) {
16752  MVT MidVT = (N00VT == MVT::v2i64 ? MVT::v4i32 : MVT::v8i16);
16754  for (size_t i = 0; i < Mask.size(); ++i)
16755  Mask[i] = i * 2;
16756  return DAG.getNode(ISD::TRUNCATE, dl, VT,
16757  DAG.getVectorShuffle(
16758  MidVT, dl,
16759  DAG.getNode(ISD::BITCAST, dl, MidVT, N00),
16760  DAG.getNode(ISD::BITCAST, dl, MidVT, N10), Mask));
16761  }
16762  }
16763 
16764  if (N->getOperand(0).getValueType() == MVT::v4i8) {
16765  // If we have a concat of v4i8 loads, convert them to a buildvector of f32
16766  // loads to prevent having to go through the v4i8 load legalization that
16767  // needs to extend each element into a larger type.
16768  if (N->getNumOperands() % 2 == 0 && all_of(N->op_values(), [](SDValue V) {
16769  if (V.getValueType() != MVT::v4i8)
16770  return false;
16771  if (V.isUndef())
16772  return true;
16773  LoadSDNode *LD = dyn_cast<LoadSDNode>(V);
16774  return LD && V.hasOneUse() && LD->isSimple() && !LD->isIndexed() &&
16775  LD->getExtensionType() == ISD::NON_EXTLOAD;
16776  })) {
16777  EVT NVT =
16778  EVT::getVectorVT(*DAG.getContext(), MVT::f32, N->getNumOperands());
16780 
16781  for (unsigned i = 0; i < N->getNumOperands(); i++) {
16782  SDValue V = N->getOperand(i);
16783  if (V.isUndef())
16784  Ops.push_back(DAG.getUNDEF(MVT::f32));
16785  else {
16786  LoadSDNode *LD = cast<LoadSDNode>(V);
16787  SDValue NewLoad =
16788  DAG.getLoad(MVT::f32, dl, LD->getChain(), LD->getBasePtr(),
16789  LD->getMemOperand());
16790  DAG.ReplaceAllUsesOfValueWith(SDValue(LD, 1), NewLoad.getValue(1));
16791  Ops.push_back(NewLoad);
16792  }
16793  }
16794  return DAG.getBitcast(N->getValueType(0),
16795  DAG.getBuildVector(NVT, dl, Ops));
16796  }
16797  }
16798 
16799  // Canonicalise concat_vectors to replace concatenations of truncated nots
16800  // with nots of concatenated truncates. This in some cases allows for multiple
16801  // redundant negations to be eliminated.
16802  // (concat_vectors (v4i16 (truncate (not (v4i32)))),
16803  // (v4i16 (truncate (not (v4i32)))))
16804  // ->
16805  // (not (concat_vectors (v4i16 (truncate (v4i32))),
16806  // (v4i16 (truncate (v4i32)))))
16807  if (N->getNumOperands() == 2 && N0Opc == ISD::TRUNCATE &&
16808  N1Opc == ISD::TRUNCATE && N->isOnlyUserOf(N0.getNode()) &&
16809  N->isOnlyUserOf(N1.getNode())) {
16810  auto isBitwiseVectorNegate = [](SDValue V) {
16811  return V->getOpcode() == ISD::XOR &&
16813  };
16814  SDValue N00 = N0->getOperand(0);
16815  SDValue N10 = N1->getOperand(0);
16816  if (isBitwiseVectorNegate(N00) && N0->isOnlyUserOf(N00.getNode()) &&
16817  isBitwiseVectorNegate(N10) && N1->isOnlyUserOf(N10.getNode())) {
16818  return DAG.getNOT(
16819  dl,
16820  DAG.getNode(ISD::CONCAT_VECTORS, dl, VT,
16821  DAG.getNode(ISD::TRUNCATE, dl, N0.getValueType(),
16822  N00->getOperand(0)),
16823  DAG.getNode(ISD::TRUNCATE, dl, N1.getValueType(),
16824  N10->getOperand(0))),
16825  VT);
16826  }
16827  }
16828 
16829  // Wait till after everything is legalized to try this. That way we have
16830  // legal vector types and such.
16831  if (DCI.isBeforeLegalizeOps())
16832  return SDValue();
16833 
16834  // Optimise concat_vectors of two [us]avgceils or [us]avgfloors that use
16835  // extracted subvectors from the same original vectors. Combine these into a
16836  // single avg that operates on the two original vectors.
16837  // avgceil is the target independant name for rhadd, avgfloor is a hadd.
16838  // Example:
16839  // (concat_vectors (v8i8 (avgceils (extract_subvector (v16i8 OpA, <0>),
16840  // extract_subvector (v16i8 OpB, <0>))),
16841  // (v8i8 (avgceils (extract_subvector (v16i8 OpA, <8>),
16842  // extract_subvector (v16i8 OpB, <8>)))))
16843  // ->
16844  // (v16i8(avgceils(v16i8 OpA, v16i8 OpB)))
16845  if (N->getNumOperands() == 2 && N0Opc == N1Opc &&
16846  (N0Opc == ISD::AVGCEILU || N0Opc == ISD::AVGCEILS ||
16847  N0Opc == ISD::AVGFLOORU || N0Opc == ISD::AVGFLOORS)) {
16848  SDValue N00 = N0->getOperand(0);
16849  SDValue N01 = N0->getOperand(1);
16850  SDValue N10 = N1->getOperand(0);
16851  SDValue N11 = N1->getOperand(1);
16852 
16853  EVT N00VT = N00.getValueType();
16854  EVT N10VT = N10.getValueType();
16855 
16856  if (N00->getOpcode() == ISD::EXTRACT_SUBVECTOR &&
16857  N01->getOpcode() == ISD::EXTRACT_SUBVECTOR &&
16858  N10->getOpcode() == ISD::EXTRACT_SUBVECTOR &&
16859  N11->getOpcode() == ISD::EXTRACT_SUBVECTOR && N00VT == N10VT) {
16860  SDValue N00Source = N00->getOperand(0);
16861  SDValue N01Source = N01->getOperand(0);
16862  SDValue N10Source = N10->getOperand(0);
16863  SDValue N11Source = N11->getOperand(0);
16864 
16865  if (N00Source == N10Source && N01Source == N11Source &&
16866  N00Source.getValueType() == VT && N01Source.getValueType() == VT) {
16867  assert(N0.getValueType() == N1.getValueType());
16868 
16869  uint64_t N00Index = N00.getConstantOperandVal(1);
16870  uint64_t N01Index = N01.getConstantOperandVal(1);
16871  uint64_t N10Index = N10.getConstantOperandVal(1);
16872  uint64_t N11Index = N11.getConstantOperandVal(1);
16873 
16874  if (N00Index == N01Index && N10Index == N11Index && N00Index == 0 &&
16875  N10Index == N00VT.getVectorNumElements())
16876  return DAG.getNode(N0Opc, dl, VT, N00Source, N01Source);
16877  }
16878  }
16879  }
16880 
16881  // If we see a (concat_vectors (v1x64 A), (v1x64 A)) it's really a vector
16882  // splat. The indexed instructions are going to be expecting a DUPLANE64, so
16883  // canonicalise to that.
16884  if (N->getNumOperands() == 2 && N0 == N1 && VT.getVectorNumElements() == 2) {
16885  assert(VT.getScalarSizeInBits() == 64);
16886  return DAG.getNode(AArch64ISD::DUPLANE64, dl, VT, WidenVector(N0, DAG),
16887  DAG.getConstant(0, dl, MVT::i64));
16888  }
16889 
16890  // Canonicalise concat_vectors so that the right-hand vector has as few
16891  // bit-casts as possible before its real operation. The primary matching
16892  // destination for these operations will be the narrowing "2" instructions,
16893  // which depend on the operation being performed on this right-hand vector.
16894  // For example,
16895  // (concat_vectors LHS, (v1i64 (bitconvert (v4i16 RHS))))
16896  // becomes
16897  // (bitconvert (concat_vectors (v4i16 (bitconvert LHS)), RHS))
16898 
16899  if (N->getNumOperands() != 2 || N1Opc != ISD::BITCAST)
16900  return SDValue();
16901  SDValue RHS = N1->getOperand(0);
16902  MVT RHSTy = RHS.getValueType().getSimpleVT();
16903  // If the RHS is not a vector, this is not the pattern we're looking for.
16904  if (!RHSTy.isVector())
16905  return SDValue();
16906 
16907  LLVM_DEBUG(
16908  dbgs() << "aarch64-lower: concat_vectors bitcast simplification\n");
16909 
16910  MVT ConcatTy = MVT::getVectorVT(RHSTy.getVectorElementType(),
16911  RHSTy.getVectorNumElements() * 2);
16912  return DAG.getNode(ISD::BITCAST, dl, VT,
16913  DAG.getNode(ISD::CONCAT_VECTORS, dl, ConcatTy,
16914  DAG.getNode(ISD::BITCAST, dl, RHSTy, N0),
16915  RHS));
16916 }
16917 
16918 static SDValue
16920  SelectionDAG &DAG) {
16921  if (DCI.isBeforeLegalizeOps())
16922  return SDValue();
16923 
16924  EVT VT = N->getValueType(0);
16925  if (!VT.isScalableVector() || VT.getVectorElementType() != MVT::i1)
16926  return SDValue();
16927 
16928  SDValue V = N->getOperand(0);
16929 
16930  // NOTE: This combine exists in DAGCombiner, but that version's legality check
16931  // blocks this combine because the non-const case requires custom lowering.
16932  //
16933  // ty1 extract_vector(ty2 splat(const))) -> ty1 splat(const)
16934  if (V.getOpcode() == ISD::SPLAT_VECTOR)
16935  if (isa<ConstantSDNode>(V.getOperand(0)))
16936  return DAG.getNode(ISD::SPLAT_VECTOR, SDLoc(N), VT, V.getOperand(0));
16937 
16938  return SDValue();
16939 }
16940 
16941 static SDValue
16943  SelectionDAG &DAG) {
16944  SDLoc DL(N);
16945  SDValue Vec = N->getOperand(0);
16946  SDValue SubVec = N->getOperand(1);
16947  uint64_t IdxVal = N->getConstantOperandVal(2);
16948  EVT VecVT = Vec.getValueType();
16949  EVT SubVT = SubVec.getValueType();
16950 
16951  // Only do this for legal fixed vector types.
16952  if (!VecVT.isFixedLengthVector() ||
16953  !DAG.getTargetLoweringInfo().isTypeLegal(VecVT) ||
16954  !DAG.getTargetLoweringInfo().isTypeLegal(SubVT))
16955  return SDValue();
16956 
16957  // Ignore widening patterns.
16958  if (IdxVal == 0 && Vec.isUndef())
16959  return SDValue();
16960 
16961  // Subvector must be half the width and an "aligned" insertion.
16962  unsigned NumSubElts = SubVT.getVectorNumElements();
16963  if ((SubVT.getSizeInBits() * 2) != VecVT.getSizeInBits() ||
16964  (IdxVal != 0 && IdxVal != NumSubElts))
16965  return SDValue();
16966 
16967  // Fold insert_subvector -> concat_vectors
16968  // insert_subvector(Vec,Sub,lo) -> concat_vectors(Sub,extract(Vec,hi))
16969  // insert_subvector(Vec,Sub,hi) -> concat_vectors(extract(Vec,lo),Sub)
16970  SDValue Lo, Hi;
16971  if (IdxVal == 0) {
16972  Lo = SubVec;
16973  Hi = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, SubVT, Vec,
16974  DAG.getVectorIdxConstant(NumSubElts, DL));
16975  } else {
16976  Lo = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, SubVT, Vec,
16977  DAG.getVectorIdxConstant(0, DL));
16978  Hi = SubVec;
16979  }
16980  return DAG.getNode(ISD::CONCAT_VECTORS, DL, VecVT, Lo, Hi);
16981 }
16982 
16985  SelectionDAG &DAG) {
16986  // Wait until after everything is legalized to try this. That way we have
16987  // legal vector types and such.
16988  if (DCI.isBeforeLegalizeOps())
16989  return SDValue();
16990  // Transform a scalar conversion of a value from a lane extract into a
16991  // lane extract of a vector conversion. E.g., from foo1 to foo2:
16992  // double foo1(int64x2_t a) { return vcvtd_n_f64_s64(a[1], 9); }
16993  // double foo2(int64x2_t a) { return vcvtq_n_f64_s64(a, 9)[1]; }
16994  //
16995  // The second form interacts better with instruction selection and the
16996  // register allocator to avoid cross-class register copies that aren't
16997  // coalescable due to a lane reference.
16998 
16999  // Check the operand and see if it originates from a lane extract.
17000  SDValue Op1 = N->getOperand(1);
17001  if (Op1.getOpcode() != ISD::EXTRACT_VECTOR_ELT)
17002  return SDValue();
17003 
17004  // Yep, no additional predication needed. Perform the transform.
17005  SDValue IID = N->getOperand(0);
17006  SDValue Shift = N->getOperand(2);
17007  SDValue Vec = Op1.getOperand(0);
17008  SDValue Lane = Op1.getOperand(1);
17009  EVT ResTy = N->getValueType(0);
17010  EVT VecResTy;
17011  SDLoc DL(N);
17012 
17013  // The vector width should be 128 bits by the time we get here, even
17014  // if it started as 64 bits (the extract_vector handling will have
17015  // done so). Bail if it is not.
17016  if (Vec.getValueSizeInBits() != 128)
17017  return SDValue();
17018 
17019  if (Vec.getValueType() == MVT::v4i32)
17020  VecResTy = MVT::v4f32;
17021  else if (Vec.getValueType() == MVT::v2i64)
17022  VecResTy = MVT::v2f64;
17023  else
17024  return SDValue();
17025 
17026  SDValue Convert =
17027  DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, VecResTy, IID, Vec, Shift);
17028  return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, ResTy, Convert, Lane);
17029 }
17030 
17031 // AArch64 high-vector "long" operations are formed by performing the non-high
17032 // version on an extract_subvector of each operand which gets the high half:
17033 //
17034 // (longop2 LHS, RHS) == (longop (extract_high LHS), (extract_high RHS))
17035 //
17036 // However, there are cases which don't have an extract_high explicitly, but
17037 // have another operation that can be made compatible with one for free. For
17038 // example:
17039 //
17040 // (dupv64 scalar) --> (extract_high (dup128 scalar))
17041 //
17042 // This routine does the actual conversion of such DUPs, once outer routines
17043 // have determined that everything else is in order.
17044 // It also supports immediate DUP-like nodes (MOVI/MVNi), which we can fold
17045 // similarly here.
17047  MVT VT = N.getSimpleValueType();
17048  if (N.getOpcode() == ISD::EXTRACT_SUBVECTOR &&
17049  N.getConstantOperandVal(1) == 0)
17050  N = N.getOperand(0);
17051 
17052  switch (N.getOpcode()) {
17053  case AArch64ISD::DUP:
17054  case AArch64ISD::DUPLANE8:
17055  case AArch64ISD::DUPLANE16:
17056  case AArch64ISD::DUPLANE32:
17057  case AArch64ISD::DUPLANE64:
17058  case AArch64ISD::MOVI:
17059  case AArch64ISD::MOVIshift:
17060  case AArch64ISD::MOVIedit:
17061  case AArch64ISD::MOVImsl:
17062  case AArch64ISD::MVNIshift:
17063  case AArch64ISD::MVNImsl:
17064  break;
17065  default:
17066  // FMOV could be supported, but isn't very useful, as it would only occur
17067  // if you passed a bitcast' floating point immediate to an eligible long
17068  // integer op (addl, smull, ...).
17069  return SDValue();
17070  }
17071 
17072  if (!VT.is64BitVector())
17073  return SDValue();
17074 
17075  SDLoc DL(N);
17076  unsigned NumElems = VT.getVectorNumElements();
17077  if (N.getValueType().is64BitVector()) {
17078  MVT ElementTy = VT.getVectorElementType();
17079  MVT NewVT = MVT::getVectorVT(ElementTy, NumElems * 2);
17080  N = DAG.getNode(N->getOpcode(), DL, NewVT, N->ops());
17081  }
17082 
17083  return DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, N,
17084  DAG.getConstant(NumElems, DL, MVT::i64));
17085 }
17086 
17088  if (N.getOpcode() == ISD::BITCAST)
17089  N = N.getOperand(0);
17090  if (N.getOpcode() != ISD::EXTRACT_SUBVECTOR)
17091  return false;
17092  if (N.getOperand(0).getValueType().isScalableVector())
17093  return false;
17094  return cast<ConstantSDNode>(N.getOperand(1))->getAPIntValue() ==
17095  N.getOperand(0).getValueType().getVectorNumElements() / 2;
17096 }
17097 
17098 /// Helper structure to keep track of ISD::SET_CC operands.
17100  const SDValue *Opnd0;
17101  const SDValue *Opnd1;
17103 };
17104 
17105 /// Helper structure to keep track of a SET_CC lowered into AArch64 code.
17107  const SDValue *Cmp;
17109 };
17110 
17111 /// Helper structure to keep track of SetCC information.
17112 union SetCCInfo {
17115 };
17116 
17117 /// Helper structure to be able to read SetCC information. If set to
17118 /// true, IsAArch64 field, Info is a AArch64SetCCInfo, otherwise Info is a
17119 /// GenericSetCCInfo.
17123 };
17124 
17125 /// Check whether or not \p Op is a SET_CC operation, either a generic or
17126 /// an
17127 /// AArch64 lowered one.
17128 /// \p SetCCInfo is filled accordingly.
17129 /// \post SetCCInfo is meanginfull only when this function returns true.
17130 /// \return True when Op is a kind of SET_CC operation.
17132  // If this is a setcc, this is straight forward.
17133  if (Op.getOpcode() == ISD::SETCC) {
17134  SetCCInfo.Info.Generic.Opnd0 = &Op.getOperand(0);
17135  SetCCInfo.Info.Generic.Opnd1 = &Op.getOperand(1);
17136  SetCCInfo.Info.Generic.CC = cast<CondCodeSDNode>(Op.getOperand(2))->get();
17137  SetCCInfo.IsAArch64 = false;
17138  return true;
17139  }
17140  // Otherwise, check if this is a matching csel instruction.
17141  // In other words:
17142  // - csel 1, 0, cc
17143  // - csel 0, 1, !cc
17144  if (Op.getOpcode() != AArch64ISD::CSEL)
17145  return false;
17146  // Set the information about the operands.
17147  // TODO: we want the operands of the Cmp not the csel
17148  SetCCInfo.Info.AArch64.Cmp = &Op.getOperand(3);
17149  SetCCInfo.IsAArch64 = true;
17150  SetCCInfo.Info.AArch64.CC = static_cast<AArch64CC::CondCode>(
17151  cast<ConstantSDNode>(Op.getOperand(2))->getZExtValue());
17152 
17153  // Check that the operands matches the constraints:
17154  // (1) Both operands must be constants.
17155  // (2) One must be 1 and the other must be 0.
17156  ConstantSDNode *TValue = dyn_cast<ConstantSDNode>(Op.getOperand(0));
17157  ConstantSDNode *FValue = dyn_cast<ConstantSDNode>(Op.getOperand(1));
17158 
17159  // Check (1).
17160  if (!TValue || !FValue)
17161  return false;
17162 
17163  // Check (2).
17164  if (!TValue->isOne()) {
17165  // Update the comparison when we are interested in !cc.
17166  std::swap(TValue, FValue);
17167  SetCCInfo.Info.AArch64.CC =
17169  }
17170  return TValue->isOne() && FValue->isZero();
17171 }
17172 
17173 // Returns true if Op is setcc or zext of setcc.
17175  if (isSetCC(Op, Info))
17176  return true;
17177  return ((Op.getOpcode() == ISD::ZERO_EXTEND) &&
17178  isSetCC(Op->getOperand(0), Info));
17179 }
17180 
17181 // The folding we want to perform is:
17182 // (add x, [zext] (setcc cc ...) )
17183 // -->
17184 // (csel x, (add x, 1), !cc ...)
17185 //
17186 // The latter will get matched to a CSINC instruction.
17188  assert(Op && Op->getOpcode() == ISD::ADD && "Unexpected operation!");
17189  SDValue LHS = Op->getOperand(0);
17190  SDValue RHS = Op->getOperand(1);
17191  SetCCInfoAndKind InfoAndKind;
17192 
17193  // If both operands are a SET_CC, then we don't want to perform this
17194  // folding and create another csel as this results in more instructions
17195  // (and higher register usage).
17196  if (isSetCCOrZExtSetCC(LHS, InfoAndKind) &&
17197  isSetCCOrZExtSetCC(RHS, InfoAndKind))
17198  return SDValue();
17199 
17200  // If neither operand is a SET_CC, give up.
17201  if (!isSetCCOrZExtSetCC(LHS, InfoAndKind)) {
17202  std::swap(LHS, RHS);
17203  if (!isSetCCOrZExtSetCC(LHS, InfoAndKind))
17204  return SDValue();
17205  }
17206 
17207  // FIXME: This could be generatized to work for FP comparisons.
17208  EVT CmpVT = InfoAndKind.IsAArch64
17209  ? InfoAndKind.Info.AArch64.Cmp->getOperand(0).getValueType()
17210  : InfoAndKind.Info.Generic.Opnd0->getValueType();
17211  if (CmpVT != MVT::i32 && CmpVT != MVT::i64)
17212  return SDValue();
17213 
17214  SDValue CCVal;
17215  SDValue Cmp;
17216  SDLoc dl(Op);
17217  if (InfoAndKind.IsAArch64) {
17218  CCVal = DAG.getConstant(
17219  AArch64CC::getInvertedCondCode(InfoAndKind.Info.AArch64.CC), dl,
17220  MVT::i32);
17221  Cmp = *InfoAndKind.Info.AArch64.Cmp;
17222  } else
17223  Cmp = getAArch64Cmp(
17224  *InfoAndKind.Info.Generic.Opnd0, *InfoAndKind.Info.Generic.Opnd1,
17225  ISD::getSetCCInverse(InfoAndKind.Info.Generic.CC, CmpVT), CCVal, DAG,
17226  dl);
17227 
17228  EVT VT = Op->getValueType(0);
17229  LHS = DAG.getNode(ISD::ADD, dl, VT, RHS, DAG.getConstant(1, dl, VT));
17230  return DAG.getNode(AArch64ISD::CSEL, dl, VT, RHS, LHS, CCVal, Cmp);
17231 }
17232 
17233 // ADD(UADDV a, UADDV b) --> UADDV(ADD a, b)
17235  EVT VT = N->getValueType(0);
17236  // Only scalar integer and vector types.
17237  if (N->getOpcode() != ISD::ADD || !VT.isScalarInteger())
17238  return SDValue();
17239 
17240  SDValue LHS = N->getOperand(0);
17241  SDValue RHS = N->getOperand(1);
17242  if (LHS.getOpcode() != ISD::EXTRACT_VECTOR_ELT ||
17243  RHS.getOpcode() != ISD::EXTRACT_VECTOR_ELT || LHS.getValueType() != VT)
17244  return SDValue();
17245 
17246  auto *LHSN1 = dyn_cast<ConstantSDNode>(LHS->getOperand(1));
17247  auto *RHSN1 = dyn_cast<ConstantSDNode>(RHS->getOperand(1));
17248  if (!LHSN1 || LHSN1 != RHSN1 || !RHSN1->isZero())
17249  return SDValue();
17250 
17251  SDValue Op1 = LHS->getOperand(0);
17252  SDValue Op2 = RHS->getOperand(0);
17253  EVT OpVT1 = Op1.getValueType();
17254  EVT OpVT2 = Op2.getValueType();
17255  if (Op1.getOpcode() != AArch64ISD::UADDV || OpVT1 != OpVT2 ||
17256  Op2.getOpcode() != AArch64ISD::UADDV ||
17257  OpVT1.getVectorElementType() != VT)
17258  return SDValue();
17259 
17260  SDValue Val1 = Op1.getOperand(0);
17261  SDValue Val2 = Op2.getOperand(0);
17262  EVT ValVT = Val1->getValueType(0);
17263  SDLoc DL(N);
17264  SDValue AddVal = DAG.getNode(ISD::ADD, DL, ValVT, Val1, Val2);
17265  return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, VT,
17266  DAG.getNode(AArch64ISD::UADDV, DL, ValVT, AddVal),
17267  DAG.getConstant(0, DL, MVT::i64));
17268 }
17269 
17270 /// Perform the scalar expression combine in the form of:
17271 /// CSEL(c, 1, cc) + b => CSINC(b+c, b, cc)
17272 /// CSNEG(c, -1, cc) + b => CSINC(b+c, b, cc)
17274  EVT VT = N->getValueType(0);
17275  if (!VT.isScalarInteger() || N->getOpcode() != ISD::ADD)
17276  return SDValue();
17277 
17278  SDValue LHS = N->getOperand(0);
17279  SDValue RHS = N->getOperand(1);
17280 
17281  // Handle commutivity.
17282  if (LHS.getOpcode() != AArch64ISD::CSEL &&
17283  LHS.getOpcode() != AArch64ISD::CSNEG) {
17284  std::swap(LHS, RHS);
17285  if (LHS.getOpcode() != AArch64ISD::CSEL &&
17286  LHS.getOpcode() != AArch64ISD::CSNEG) {
17287  return SDValue();
17288  }
17289  }
17290 
17291  if (!LHS.hasOneUse())
17292  return SDValue();
17293 
17294  AArch64CC::CondCode AArch64CC =
17295  static_cast<AArch64CC::CondCode>(LHS.getConstantOperandVal(2));
17296 
17297  // The CSEL should include a const one operand, and the CSNEG should include
17298  // One or NegOne operand.
17299  ConstantSDNode *CTVal = dyn_cast<ConstantSDNode>(LHS.getOperand(0));
17300  ConstantSDNode *CFVal = dyn_cast<ConstantSDNode>(LHS.getOperand(1));
17301  if (!CTVal || !CFVal)
17302  return SDValue();
17303 
17304  if (!(LHS.getOpcode() == AArch64ISD::CSEL &&
17305  (CTVal->isOne() || CFVal->isOne())) &&
17306  !(LHS.getOpcode() == AArch64ISD::CSNEG &&
17307  (CTVal->isOne() || CFVal->isAllOnes())))
17308  return SDValue();
17309 
17310  // Switch CSEL(1, c, cc) to CSEL(c, 1, !cc)
17311  if (LHS.getOpcode() == AArch64ISD::CSEL && CTVal->isOne() &&
17312  !CFVal->isOne()) {
17313  std::swap(CTVal, CFVal);
17314  AArch64CC = AArch64CC::getInvertedCondCode(AArch64CC);
17315  }
17316 
17317  SDLoc DL(N);
17318  // Switch CSNEG(1, c, cc) to CSNEG(-c, -1, !cc)
17319  if (LHS.getOpcode() == AArch64ISD::CSNEG && CTVal->isOne() &&
17320  !CFVal->isAllOnes()) {
17321  APInt C = -1 * CFVal->getAPIntValue();
17322  CTVal = cast<ConstantSDNode>(DAG.getConstant(C, DL, VT));
17323  CFVal = cast<ConstantSDNode>(DAG.getAllOnesConstant(DL, VT));
17324  AArch64CC = AArch64CC::getInvertedCondCode(AArch64CC);
17325  }
17326 
17327  // It might be neutral for larger constants, as the immediate need to be
17328  // materialized in a register.
17329  APInt ADDC = CTVal->getAPIntValue();
17330  const TargetLowering &TLI = DAG.getTargetLoweringInfo();
17331  if (!TLI.isLegalAddImmediate(ADDC.getSExtValue()))
17332  return SDValue();
17333 
17334  assert(((LHS.getOpcode() == AArch64ISD::CSEL && CFVal->isOne()) ||
17335  (LHS.getOpcode() == AArch64ISD::CSNEG && CFVal->isAllOnes())) &&
17336  "Unexpected constant value");
17337 
17338  SDValue NewNode = DAG.getNode(ISD::ADD, DL, VT, RHS, SDValue(CTVal, 0));
17339  SDValue CCVal = DAG.getConstant(AArch64CC, DL, MVT::i32);
17340  SDValue Cmp = LHS.getOperand(3);
17341 
17342  return DAG.getNode(AArch64ISD::CSINC, DL, VT, NewNode, RHS, CCVal, Cmp);
17343 }
17344 
17345 // ADD(UDOT(zero, x, y), A) --> UDOT(A, x, y)
17347  EVT VT = N->getValueType(0);
17348  if (N->getOpcode() != ISD::ADD)
17349  return SDValue();
17350 
17351  SDValue Dot = N->getOperand(0);
17352  SDValue A = N->getOperand(1);
17353  // Handle commutivity
17354  auto isZeroDot = [](SDValue Dot) {
17355  return (Dot.getOpcode() == AArch64ISD::UDOT ||
17356  Dot.getOpcode() == AArch64ISD::SDOT) &&
17357  isZerosVector(Dot.getOperand(0).getNode());
17358  };
17359  if (!isZeroDot(Dot))
17360  std::swap(Dot, A);
17361  if (!isZeroDot(Dot))
17362  return SDValue();
17363 
17364  return DAG.getNode(Dot.getOpcode(), SDLoc(N), VT, A, Dot.getOperand(1),
17365  Dot.getOperand(2));
17366 }
17367 
17369  return Op.getOpcode() == ISD::SUB && isNullConstant(Op.getOperand(0));
17370 }
17371 
17373  SDLoc DL(Op);
17374  EVT VT = Op.getValueType();
17375  SDValue Zero = DAG.getConstant(0, DL, VT);
17376  return DAG.getNode(ISD::SUB, DL, VT, Zero, Op);
17377 }
17378 
17379 // Try to fold
17380 //
17381 // (neg (csel X, Y)) -> (csel (neg X), (neg Y))
17382 //
17383 // The folding helps csel to be matched with csneg without generating
17384 // redundant neg instruction, which includes negation of the csel expansion
17385 // of abs node lowered by lowerABS.
17387  if (!isNegatedInteger(SDValue(N, 0)))
17388  return SDValue();
17389 
17390  SDValue CSel = N->getOperand(1);
17391  if (CSel.getOpcode() != AArch64ISD::CSEL || !CSel->hasOneUse())
17392  return SDValue();
17393 
17394  SDValue N0 = CSel.getOperand(0);
17395  SDValue N1 = CSel.getOperand(1);
17396 
17397  // If both of them is not negations, it's not worth the folding as it
17398  // introduces two additional negations while reducing one negation.
17399  if (!isNegatedInteger(N0) && !isNegatedInteger(N1))
17400  return SDValue();
17401 
17402  SDValue N0N = getNegatedInteger(N0, DAG);
17403  SDValue N1N = getNegatedInteger(N1, DAG);
17404 
17405  SDLoc DL(N);
17406  EVT VT = CSel.getValueType();
17407  return DAG.getNode(AArch64ISD::CSEL, DL, VT, N0N, N1N, CSel.getOperand(2),
17408  CSel.getOperand(3));
17409 }
17410 
17411 // The basic add/sub long vector instructions have variants with "2" on the end
17412 // which act on the high-half of their inputs. They are normally matched by
17413 // patterns like:
17414 //
17415 // (add (zeroext (extract_high LHS)),
17416 // (zeroext (extract_high RHS)))
17417 // -> uaddl2 vD, vN, vM
17418 //
17419 // However, if one of the extracts is something like a duplicate, this
17420 // instruction can still be used profitably. This function puts the DAG into a
17421 // more appropriate form for those patterns to trigger.
17424  SelectionDAG &DAG) {
17425  if (DCI.isBeforeLegalizeOps())
17426  return SDValue();
17427 
17428  MVT VT = N->getSimpleValueType(0);
17429  if (!VT.is128BitVector()) {
17430  if (N->getOpcode() == ISD::ADD)
17431  return performSetccAddFolding(N, DAG);
17432  return SDValue();
17433  }
17434 
17435  // Make sure both branches are extended in the same way.
17436  SDValue LHS = N->getOperand(0);
17437  SDValue RHS = N->getOperand(1);
17438  if ((LHS.getOpcode() != ISD::ZERO_EXTEND &&
17439  LHS.getOpcode() != ISD::SIGN_EXTEND) ||
17440  LHS.getOpcode() != RHS.getOpcode())
17441  return SDValue();
17442 
17443  unsigned ExtType = LHS.getOpcode();
17444 
17445  // It's not worth doing if at least one of the inputs isn't already an
17446  // extract, but we don't know which it'll be so we have to try both.
17447  if (isEssentiallyExtractHighSubvector(LHS.getOperand(0))) {
17448  RHS = tryExtendDUPToExtractHigh(RHS.getOperand(0), DAG);
17449  if (!RHS.getNode())
17450  return SDValue();
17451 
17452  RHS = DAG.getNode(ExtType, SDLoc(N), VT, RHS);
17453  } else if (isEssentiallyExtractHighSubvector(RHS.getOperand(0))) {
17454  LHS = tryExtendDUPToExtractHigh(LHS.getOperand(0), DAG);
17455  if (!LHS.getNode())
17456  return SDValue();
17457 
17458  LHS = DAG.getNode(ExtType, SDLoc(N), VT, LHS);
17459  }
17460 
17461  return DAG.getNode(N->getOpcode(), SDLoc(N), VT, LHS, RHS);
17462 }
17463 
17464 static bool isCMP(SDValue Op) {
17465  return Op.getOpcode() == AArch64ISD::SUBS &&
17466  !Op.getNode()->hasAnyUseOfValue(0);
17467 }
17468 
17469 // (CSEL 1 0 CC Cond) => CC
17470 // (CSEL 0 1 CC Cond) => !CC
17471 static std::optional<AArch64CC::CondCode> getCSETCondCode(SDValue Op) {
17472  if (Op.getOpcode() != AArch64ISD::CSEL)
17473  return std::nullopt;
17474  auto CC = static_cast<AArch64CC::CondCode>(Op.getConstantOperandVal(2));
17475  if (CC == AArch64CC::AL || CC == AArch64CC::NV)
17476  return std::nullopt;
17477  SDValue OpLHS = Op.getOperand(0);
17478  SDValue OpRHS = Op.getOperand(1);
17479  if (isOneConstant(OpLHS) && isNullConstant(OpRHS))
17480  return CC;
17481  if (isNullConstant(OpLHS) && isOneConstant(OpRHS))
17482  return getInvertedCondCode(CC);
17483 
17484  return std::nullopt;
17485 }
17486 
17487 // (ADC{S} l r (CMP (CSET HS carry) 1)) => (ADC{S} l r carry)
17488 // (SBC{S} l r (CMP 0 (CSET LO carry))) => (SBC{S} l r carry)
17489 static SDValue foldOverflowCheck(SDNode *Op, SelectionDAG &DAG, bool IsAdd) {
17490  SDValue CmpOp = Op->getOperand(2);
17491  if (!isCMP(CmpOp))
17492  return SDValue();
17493 
17494  if (IsAdd) {
17495  if (!isOneConstant(CmpOp.getOperand(1)))
17496  return SDValue();
17497  } else {
17498  if (!isNullConstant(CmpOp.getOperand(0)))
17499  return SDValue();
17500  }
17501 
17502  SDValue CsetOp = CmpOp->getOperand(IsAdd ? 0 : 1);
17503  auto CC = getCSETCondCode(CsetOp);
17504  if (CC != (IsAdd ? AArch64CC::HS : AArch64CC::LO))
17505  return SDValue();
17506 
17507  return DAG.getNode(Op->getOpcode(), SDLoc(Op), Op->getVTList(),
17508  Op->getOperand(0), Op->getOperand(1),
17509  CsetOp.getOperand(3));
17510 }
17511 
17512 // (ADC x 0 cond) => (CINC x HS cond)
17514  SDValue LHS = N->getOperand(0);
17515  SDValue RHS = N->getOperand(1);
17516  SDValue Cond = N->getOperand(2);
17517 
17518  if (!isNullConstant(RHS))
17519  return SDValue();
17520 
17521  EVT VT = N->getValueType(0);
17522  SDLoc DL(N);
17523 
17524  // (CINC x cc cond) <=> (CSINC x x !cc cond)
17526  return DAG.getNode(AArch64ISD::CSINC, DL, VT, LHS, LHS, CC, Cond);
17527 }
17528 
17529 // Transform vector add(zext i8 to i32, zext i8 to i32)
17530 // into sext(add(zext(i8 to i16), zext(i8 to i16)) to i32)
17531 // This allows extra uses of saddl/uaddl at the lower vector widths, and less
17532 // extends.
17534  EVT VT = N->getValueType(0);
17535  if (!VT.isFixedLengthVector() || VT.getSizeInBits() <= 128 ||
17536  (N->getOperand(0).getOpcode() != ISD::ZERO_EXTEND &&
17537  N->getOperand(0).getOpcode() != ISD::SIGN_EXTEND) ||
17538  (N->getOperand(1).getOpcode() != ISD::ZERO_EXTEND &&
17539  N->getOperand(1).getOpcode() != ISD::SIGN_EXTEND) ||
17540  N->getOperand(0).getOperand(0).getValueType() !=
17541  N->getOperand(1).getOperand(0).getValueType())
17542  return SDValue();
17543 
17544  SDValue N0 = N->getOperand(0).getOperand(0);
17545  SDValue N1 = N->getOperand(1).getOperand(0);
17546  EVT InVT = N0.getValueType();
17547 
17548  EVT S1 = InVT.getScalarType();
17549  EVT S2 = VT.getScalarType();
17550  if ((S2 == MVT::i32 && S1 == MVT::i8) ||
17551  (S2 == MVT::i64 && (S1 == MVT::i8 || S1 == MVT::i16))) {
17552  SDLoc DL(N);
17553  EVT HalfVT = EVT::getVectorVT(*DAG.getContext(),
17554  S2.getHalfSizedIntegerVT(*DAG.getContext()),
17555  VT.getVectorElementCount());
17556  SDValue NewN0 = DAG.getNode(N->getOperand(0).getOpcode(), DL, HalfVT, N0);
17557  SDValue NewN1 = DAG.getNode(N->getOperand(1).getOpcode(), DL, HalfVT, N1);
17558  SDValue NewOp = DAG.getNode(N->getOpcode(), DL, HalfVT, NewN0, NewN1);
17559  return DAG.getNode(ISD::SIGN_EXTEND, DL, VT, NewOp);
17560  }
17561  return SDValue();
17562 }
17563 
17566  SelectionDAG &DAG) {
17567  SDLoc DL(N);
17568  EVT VT = N->getValueType(0);
17569 
17570  // A build vector of two extracted elements is equivalent to an
17571  // extract subvector where the inner vector is any-extended to the
17572  // extract_vector_elt VT.
17573  // (build_vector (extract_elt_iXX_to_i32 vec Idx+0)
17574  // (extract_elt_iXX_to_i32 vec Idx+1))
17575  // => (extract_subvector (anyext_iXX_to_i32 vec) Idx)
17576 
17577  // For now, only consider the v2i32 case, which arises as a result of
17578  // legalization.
17579  if (VT != MVT::v2i32)
17580  return SDValue();
17581 
17582  SDValue Elt0 = N->getOperand(0), Elt1 = N->getOperand(1);
17583  // Reminder, EXTRACT_VECTOR_ELT has the effect of any-extending to its VT.
17584  if (Elt0->getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
17585  Elt1->getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
17586  // Constant index.
17587  isa<ConstantSDNode>(Elt0->getOperand(1)) &&
17588  isa<ConstantSDNode>(Elt1->getOperand(1)) &&
17589  // Both EXTRACT_VECTOR_ELT from same vector...
17590  Elt0->getOperand(0) == Elt1->getOperand(0) &&
17591  // ... and contiguous. First element's index +1 == second element's index.
17592  Elt0->getConstantOperandVal(1) + 1 == Elt1->getConstantOperandVal(1) &&
17593  // EXTRACT_SUBVECTOR requires that Idx be a constant multiple of
17594  // ResultType's known minimum vector length.
17595  Elt0->getConstantOperandVal(1) % VT.getVectorMinNumElements() == 0) {
17596  SDValue VecToExtend = Elt0->getOperand(0);
17597  EVT ExtVT = VecToExtend.getValueType().changeVectorElementType(MVT::i32);
17598  if (!DAG.getTargetLoweringInfo().isTypeLegal(ExtVT))
17599  return SDValue();
17600 
17601  SDValue SubvectorIdx = DAG.getVectorIdxConstant(Elt0->getConstantOperandVal(1), DL);
17602 
17603  SDValue Ext = DAG.getNode(ISD::ANY_EXTEND, DL, ExtVT, VecToExtend);
17605  SubvectorIdx);
17606  }
17607 
17608  return SDValue();
17609 }
17610 
17612  SelectionDAG &DAG) {
17613  EVT VT = N->getValueType(0);
17614  SDValue N0 = N->getOperand(0);
17615  if (VT.isFixedLengthVector() && VT.is64BitVector() && N0.hasOneUse() &&
17616  N0.getOpcode() == AArch64ISD::DUP) {
17617  SDValue Op = N0.getOperand(0);
17618  if (VT.getScalarType() == MVT::i32 &&
17620  Op = DAG.getNode(ISD::TRUNCATE, SDLoc(N), MVT::i32, Op);
17621  return DAG.getNode(N0.getOpcode(), SDLoc(N), VT, Op);
17622  }
17623 
17624  return SDValue();
17625 }
17626 
17627 // Check an node is an extend or shift operand
17629  unsigned Opcode = N.getOpcode();
17630  if (Opcode == ISD::SIGN_EXTEND || Opcode == ISD::SIGN_EXTEND_INREG ||
17631  Opcode == ISD::ZERO_EXTEND || Opcode == ISD::ANY_EXTEND) {
17632  EVT SrcVT;
17633  if (Opcode == ISD::SIGN_EXTEND_INREG)
17634  SrcVT = cast<VTSDNode>(N.getOperand(1))->getVT();
17635  else
17636  SrcVT = N.getOperand(0).getValueType();
17637 
17638  return SrcVT == MVT::i32 || SrcVT == MVT::i16 || SrcVT == MVT::i8;
17639  } else if (Opcode == ISD::AND) {
17640  ConstantSDNode *CSD = dyn_cast<ConstantSDNode>(N.getOperand(1));
17641  if (!CSD)
17642  return false;
17643  uint64_t AndMask = CSD->getZExtValue();
17644  return AndMask == 0xff || AndMask == 0xffff || AndMask == 0xffffffff;
17645  } else if (Opcode == ISD::SHL || Opcode == ISD::SRL || Opcode == ISD::SRA) {
17646  return isa<ConstantSDNode>(N.getOperand(1));
17647  }
17648 
17649  return false;
17650 }
17651 
17652 // (N - Y) + Z --> (Z - Y) + N
17653 // when N is an extend or shift operand
17655  SelectionDAG &DAG) {
17656  auto IsOneUseExtend = [](SDValue N) {
17657  return N.hasOneUse() && isExtendOrShiftOperand(N);
17658  };
17659 
17660  // DAGCombiner will revert the combination when Z is constant cause
17661  // dead loop. So don't enable the combination when Z is constant.
17662  // If Z is one use shift C, we also can't do the optimization.
17663  // It will falling to self infinite loop.
17664  if (isa<ConstantSDNode>(Z) || IsOneUseExtend(Z))
17665  return SDValue();
17666 
17667  if (SUB.getOpcode() != ISD::SUB || !SUB.hasOneUse())
17668  return SDValue();
17669 
17670  SDValue Shift = SUB.getOperand(0);
17671  if (!IsOneUseExtend(Shift))
17672  return SDValue();
17673 
17674  SDLoc DL(N);
17675  EVT VT = N->getValueType(0);
17676 
17677  SDValue Y = SUB.getOperand(1);
17678  SDValue NewSub = DAG.getNode(ISD::SUB, DL, VT, Z, Y);
17679  return DAG.getNode(ISD::ADD, DL, VT, NewSub, Shift);
17680 }
17681 
17683  SelectionDAG &DAG) {
17684  // NOTE: Swapping LHS and RHS is not done for SUB, since SUB is not
17685  // commutative.
17686  if (N->getOpcode() != ISD::ADD)
17687  return SDValue();
17688 
17689  // Bail out when value type is not one of {i32, i64}, since AArch64 ADD with
17690  // shifted register is only available for i32 and i64.
17691  EVT VT = N->getValueType(0);
17692  if (VT != MVT::i32 && VT != MVT::i64)
17693  return SDValue();
17694 
17695  SDLoc DL(N);
17696  SDValue LHS = N->getOperand(0);
17697  SDValue RHS = N->getOperand(1);
17698 
17699  if (SDValue Val = performAddCombineSubShift(N, LHS, RHS, DAG))
17700  return Val;
17701  if (SDValue Val = performAddCombineSubShift(N, RHS, LHS, DAG))
17702  return Val;
17703 
17704  uint64_t LHSImm = 0, RHSImm = 0;
17705  // If both operand are shifted by imm and shift amount is not greater than 4
17706  // for one operand, swap LHS and RHS to put operand with smaller shift amount
17707  // on RHS.
17708  //
17709  // On many AArch64 processors (Cortex A78, Neoverse N1/N2/V1, etc), ADD with
17710  // LSL shift (shift <= 4) has smaller latency and larger throughput than ADD
17711  // with LSL (shift > 4). For the rest of processors, this is no-op for
17712  // performance or correctness.
17713  if (isOpcWithIntImmediate(LHS.getNode(), ISD::SHL, LHSImm) &&
17714  isOpcWithIntImmediate(RHS.getNode(), ISD::SHL, RHSImm) && LHSImm <= 4 &&
17715  RHSImm > 4 && LHS.hasOneUse())
17716  return DAG.getNode(ISD::ADD, DL, VT, RHS, LHS);
17717 
17718  return SDValue();
17719 }
17720 
17721 // The mid end will reassociate sub(sub(x, m1), m2) to sub(x, add(m1, m2))
17722 // This reassociates it back to allow the creation of more mls instructions.
17724  if (N->getOpcode() != ISD::SUB)
17725  return SDValue();
17726 
17727  SDValue Add = N->getOperand(1);
17728  SDValue X = N->getOperand(0);
17729  if (Add.getOpcode() != ISD::ADD)
17730  return SDValue();
17731 
17732  if (!Add.hasOneUse())
17733  return SDValue();
17735  return SDValue();
17736 
17737  SDValue M1 = Add.getOperand(0);
17738  SDValue M2 = Add.getOperand(1);
17739  if (M1.getOpcode() != ISD::MUL && M1.getOpcode() != AArch64ISD::SMULL &&
17740  M1.getOpcode() != AArch64ISD::UMULL)
17741  return SDValue();
17742  if (M2.getOpcode() != ISD::MUL && M2.getOpcode() != AArch64ISD::SMULL &&
17743  M2.getOpcode() != AArch64ISD::UMULL)
17744  return SDValue();
17745 
17746  EVT VT = N->getValueType(0);
17747  SDValue Sub = DAG.getNode(ISD::SUB, SDLoc(N), VT, X, M1);
17748  return DAG.getNode(ISD::SUB, SDLoc(N), VT, Sub, M2);
17749 }
17750 
17753  SelectionDAG &DAG) {
17754  // Try to change sum of two reductions.
17755  if (SDValue Val = performAddUADDVCombine(N, DAG))
17756  return Val;
17757  if (SDValue Val = performAddDotCombine(N, DAG))
17758  return Val;
17759  if (SDValue Val = performAddCSelIntoCSinc(N, DAG))
17760  return Val;
17761  if (SDValue Val = performNegCSelCombine(N, DAG))
17762  return Val;
17763  if (SDValue Val = performVectorAddSubExtCombine(N, DAG))
17764  return Val;
17766  return Val;
17767  if (SDValue Val = performSubAddMULCombine(N, DAG))
17768  return Val;
17769 
17770  return performAddSubLongCombine(N, DCI, DAG);
17771 }
17772 
17773 // Massage DAGs which we can use the high-half "long" operations on into
17774 // something isel will recognize better. E.g.
17775 //
17776 // (aarch64_neon_umull (extract_high vec) (dupv64 scalar)) -->
17777 // (aarch64_neon_umull (extract_high (v2i64 vec)))
17778 // (extract_high (v2i64 (dup128 scalar)))))
17779 //
17780 static SDValue tryCombineLongOpWithDup(unsigned IID, SDNode *N,
17782  SelectionDAG &DAG) {
17783  if (DCI.isBeforeLegalizeOps())
17784  return SDValue();
17785 
17786  SDValue LHS = N->getOperand((IID == Intrinsic::not_intrinsic) ? 0 : 1);
17787  SDValue RHS = N->getOperand((IID == Intrinsic::not_intrinsic) ? 1 : 2);
17788  assert(LHS.getValueType().is64BitVector() &&
17789  RHS.getValueType().is64BitVector() &&
17790  "unexpected shape for long operation");
17791 
17792  // Either node could be a DUP, but it's not worth doing both of them (you'd
17793  // just as well use the non-high version) so look for a corresponding extract
17794  // operation on the other "wing".
17797  if (!RHS.getNode())
17798  return SDValue();
17799  } else if (isEssentiallyExtractHighSubvector(RHS)) {
17801  if (!LHS.getNode())
17802  return SDValue();
17803  }
17804 
17805  if (IID == Intrinsic::not_intrinsic)
17806  return DAG.getNode(N->getOpcode(), SDLoc(N), N->getValueType(0), LHS, RHS);
17807 
17808  return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, SDLoc(N), N->getValueType(0),
17809  N->getOperand(0), LHS, RHS);
17810 }
17811 
17812 static SDValue tryCombineShiftImm(unsigned IID, SDNode *N, SelectionDAG &DAG) {
17813  MVT ElemTy = N->getSimpleValueType(0).getScalarType();
17814  unsigned ElemBits = ElemTy.getSizeInBits();
17815 
17816  int64_t ShiftAmount;
17817  if (BuildVectorSDNode *BVN = dyn_cast<BuildVectorSDNode>(N->getOperand(2))) {
17818  APInt SplatValue, SplatUndef;
17819  unsigned SplatBitSize;
17820  bool HasAnyUndefs;
17821  if (!BVN->isConstantSplat(SplatValue, SplatUndef, SplatBitSize,
17822  HasAnyUndefs, ElemBits) ||
17823  SplatBitSize != ElemBits)
17824  return SDValue();
17825 
17826  ShiftAmount = SplatValue.getSExtValue();
17827  } else if (ConstantSDNode *CVN = dyn_cast<ConstantSDNode>(N->getOperand(2))) {
17828  ShiftAmount = CVN->getSExtValue();
17829  } else
17830  return SDValue();
17831 
17832  unsigned Opcode;
17833  bool IsRightShift;
17834  switch (IID) {
17835  default:
17836  llvm_unreachable("Unknown shift intrinsic");
17837  case Intrinsic::aarch64_neon_sqshl:
17838  Opcode = AArch64ISD::SQSHL_I;
17839  IsRightShift = false;
17840  break;
17841  case Intrinsic::aarch64_neon_uqshl:
17842  Opcode = AArch64ISD::UQSHL_I;
17843  IsRightShift = false;
17844  break;
17845  case Intrinsic::aarch64_neon_srshl:
17846  Opcode = AArch64ISD::SRSHR_I;
17847  IsRightShift = true;
17848  break;
17849  case Intrinsic::aarch64_neon_urshl:
17850  Opcode = AArch64ISD::URSHR_I;
17851  IsRightShift = true;
17852  break;
17853  case Intrinsic::aarch64_neon_sqshlu:
17854  Opcode = AArch64ISD::SQSHLU_I;
17855  IsRightShift = false;
17856  break;
17857  case Intrinsic::aarch64_neon_sshl:
17858  case Intrinsic::aarch64_neon_ushl:
17859  // For positive shift amounts we can use SHL, as ushl/sshl perform a regular
17860  // left shift for positive shift amounts. Below, we only replace the current
17861  // node with VSHL, if this condition is met.
17862  Opcode = AArch64ISD::VSHL;
17863  IsRightShift = false;
17864  break;
17865  }
17866 
17867  if (IsRightShift && ShiftAmount <= -1 && ShiftAmount >= -(int)ElemBits) {
17868  SDLoc dl(N);
17869  return DAG.getNode(Opcode, dl, N->getValueType(0), N->getOperand(1),
17870  DAG.getConstant(-ShiftAmount, dl, MVT::i32));
17871  } else if (!IsRightShift && ShiftAmount >= 0 && ShiftAmount < ElemBits) {
17872  SDLoc dl(N);
17873  return DAG.getNode(Opcode, dl, N->getValueType(0), N->getOperand(1),
17874  DAG.getConstant(ShiftAmount, dl, MVT::i32));
17875  }
17876 
17877  return SDValue();
17878 }
17879 
17880 // The CRC32[BH] instructions ignore the high bits of their data operand. Since
17881 // the intrinsics must be legal and take an i32, this means there's almost
17882 // certainly going to be a zext in the DAG which we can eliminate.
17883 static SDValue tryCombineCRC32(unsigned Mask, SDNode *N, SelectionDAG &DAG) {
17884  SDValue AndN = N->getOperand(2);
17885  if (AndN.getOpcode() != ISD::AND)
17886  return SDValue();
17887 
17888  ConstantSDNode *CMask = dyn_cast<ConstantSDNode>(AndN.getOperand(1));
17889  if (!CMask || CMask->getZExtValue() != Mask)
17890  return SDValue();
17891 
17893  N->getOperand(0), N->getOperand(1), AndN.getOperand(0));
17894 }
17895 
17897  SelectionDAG &DAG) {
17898  SDLoc dl(N);
17899  return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, N->getValueType(0),
17900  DAG.getNode(Opc, dl,
17901  N->getOperand(1).getSimpleValueType(),
17902  N->getOperand(1)),
17903  DAG.getConstant(0, dl, MVT::i64));
17904 }
17905 
17907  SDLoc DL(N);
17908  SDValue Op1 = N->getOperand(1);
17909  SDValue Op2 = N->getOperand(2);
17910  EVT ScalarTy = Op2.getValueType();
17911  if ((ScalarTy == MVT::i8) || (ScalarTy == MVT::i16))
17912  ScalarTy = MVT::i32;
17913 
17914  // Lower index_vector(base, step) to mul(step step_vector(1)) + splat(base).
17915  SDValue StepVector = DAG.getStepVector(DL, N->getValueType(0));
17916  SDValue Step = DAG.getNode(ISD::SPLAT_VECTOR, DL, N->getValueType(0), Op2);
17917  SDValue Mul = DAG.getNode(ISD::MUL, DL, N->getValueType(0), StepVector, Step);
17918  SDValue Base = DAG.getNode(ISD::SPLAT_VECTOR, DL, N->getValueType(0), Op1);
17919  return DAG.getNode(ISD::ADD, DL, N->getValueType(0), Mul, Base);
17920 }
17921 
17923  SDLoc dl(N);
17924  SDValue Scalar = N->getOperand(3);
17925  EVT ScalarTy = Scalar.getValueType();
17926 
17927  if ((ScalarTy == MVT::i8) || (ScalarTy == MVT::i16))
17928  Scalar = DAG.getNode(ISD::ANY_EXTEND, dl, MVT::i32, Scalar);
17929 
17930  SDValue Passthru = N->getOperand(1);
17931  SDValue Pred = N->getOperand(2);
17932  return DAG.getNode(AArch64ISD::DUP_MERGE_PASSTHRU, dl, N->getValueType(0),
17933  Pred, Scalar, Passthru);
17934 }
17935 
17937  SDLoc dl(N);
17938  LLVMContext &Ctx = *DAG.getContext();
17939  EVT VT = N->getValueType(0);
17940 
17941  assert(VT.isScalableVector() && "Expected a scalable vector.");
17942 
17943  // Current lowering only supports the SVE-ACLE types.
17945  return SDValue();
17946 
17947  unsigned ElemSize = VT.getVectorElementType().getSizeInBits() / 8;
17948  unsigned ByteSize = VT.getSizeInBits().getKnownMinValue() / 8;
17949  EVT ByteVT =
17951 
17952  // Convert everything to the domain of EXT (i.e bytes).
17953  SDValue Op0 = DAG.getNode(ISD::BITCAST, dl, ByteVT, N->getOperand(1));
17954  SDValue Op1 = DAG.getNode(ISD::BITCAST, dl, ByteVT, N->getOperand(2));
17955  SDValue Op2 = DAG.getNode(ISD::MUL, dl, MVT::i32, N->getOperand(3),
17956  DAG.getConstant(ElemSize, dl, MVT::i32));
17957 
17958  SDValue EXT = DAG.getNode(AArch64ISD::EXT, dl, ByteVT, Op0, Op1, Op2);
17959  return DAG.getNode(ISD::BITCAST, dl, VT, EXT);
17960 }
17961 
17964  SelectionDAG &DAG) {
17965  if (DCI.isBeforeLegalize())
17966  return SDValue();
17967 
17968  SDValue Comparator = N->getOperand(3);
17969  if (Comparator.getOpcode() == AArch64ISD::DUP ||
17970  Comparator.getOpcode() == ISD::SPLAT_VECTOR) {
17971  unsigned IID = getIntrinsicID(N);
17972  EVT VT = N->getValueType(0);
17973  EVT CmpVT = N->getOperand(2).getValueType();
17974  SDValue Pred = N->getOperand(1);
17975  SDValue Imm;
17976  SDLoc DL(N);
17977 
17978  switch (IID) {
17979  default:
17980  llvm_unreachable("Called with wrong intrinsic!");
17981  break;
17982 
17983  // Signed comparisons
17984  case Intrinsic::aarch64_sve_cmpeq_wide:
17985  case Intrinsic::aarch64_sve_cmpne_wide:
17986  case Intrinsic::aarch64_sve_cmpge_wide:
17987  case Intrinsic::aarch64_sve_cmpgt_wide:
17988  case Intrinsic::aarch64_sve_cmplt_wide:
17989  case Intrinsic::aarch64_sve_cmple_wide: {
17990  if (auto *CN = dyn_cast<ConstantSDNode>(Comparator.getOperand(0))) {
17991  int64_t ImmVal = CN->getSExtValue();
17992  if (ImmVal >= -16 && ImmVal <= 15)
17993  Imm = DAG.getConstant(ImmVal, DL, MVT::i32);
17994  else
17995  return SDValue();
17996  }
17997  break;
17998  }
17999  // Unsigned comparisons
18000  case Intrinsic::aarch64_sve_cmphs_wide:
18001  case Intrinsic::aarch64_sve_cmphi_wide:
18002  case Intrinsic::aarch64_sve_cmplo_wide:
18003  case Intrinsic::aarch64_sve_cmpls_wide: {
18004  if (auto *CN = dyn_cast<ConstantSDNode>(Comparator.getOperand(0))) {
18005  uint64_t ImmVal = CN->getZExtValue();
18006  if (ImmVal <= 127)
18007  Imm = DAG.getConstant(ImmVal, DL, MVT::i32);
18008  else
18009  return SDValue();
18010  }
18011  break;
18012  }
18013  }
18014 
18015  if (!Imm)
18016  return SDValue();
18017 
18018  SDValue Splat = DAG.getNode(ISD::SPLAT_VECTOR, DL, CmpVT, Imm);
18019  return DAG.getNode(AArch64ISD::SETCC_MERGE_ZERO, DL, VT, Pred,
18020  N->getOperand(2), Splat, DAG.getCondCode(CC));
18021  }
18022 
18023  return SDValue();
18024 }
18025 
18028  const TargetLowering &TLI = DAG.getTargetLoweringInfo();
18029 
18030  SDLoc DL(Op);
18031  assert(Op.getValueType().isScalableVector() &&
18032  TLI.isTypeLegal(Op.getValueType()) &&
18033  "Expected legal scalable vector type!");
18034  assert(Op.getValueType() == Pg.getValueType() &&
18035  "Expected same type for PTEST operands");
18036 
18037  // Ensure target specific opcodes are using legal type.
18038  EVT OutVT = TLI.getTypeToTransformTo(*DAG.getContext(), VT);
18039  SDValue TVal = DAG.getConstant(1, DL, OutVT);
18040  SDValue FVal = DAG.getConstant(0, DL, OutVT);
18041 
18042  // Ensure operands have type nxv16i1.
18043  if (Op.getValueType() != MVT::nxv16i1) {
18047  else
18048  Pg = getSVEPredicateBitCast(MVT::nxv16i1, Pg, DAG);
18050  }
18051 
18052  // Set condition code (CC) flags.
18053  SDValue Test = DAG.getNode(
18055  DL, MVT::Other, Pg, Op);
18056 
18057  // Convert CC to integer based on requested condition.
18058  // NOTE: Cond is inverted to promote CSEL's removal when it feeds a compare.
18060  SDValue Res = DAG.getNode(AArch64ISD::CSEL, DL, OutVT, FVal, TVal, CC, Test);
18061  return DAG.getZExtOrTrunc(Res, DL, VT);
18062 }
18063 
18064 static SDValue combineSVEReductionInt(SDNode *N, unsigned Opc,
18065  SelectionDAG &DAG) {
18066  SDLoc DL(N);
18067 
18068  SDValue Pred = N->getOperand(1);
18069  SDValue VecToReduce = N->getOperand(2);
18070 
18071  // NOTE: The integer reduction's result type is not always linked to the
18072  // operand's element type so we construct it from the intrinsic's result type.
18073  EVT ReduceVT = getPackedSVEVectorVT(N->getValueType(0));
18074  SDValue Reduce = DAG.getNode(Opc, DL, ReduceVT, Pred, VecToReduce);
18075 
18076  // SVE reductions set the whole vector register with the first element
18077  // containing the reduction result, which we'll now extract.
18078  SDValue Zero = DAG.getConstant(0, DL, MVT::i64);
18079  return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, N->getValueType(0), Reduce,
18080  Zero);
18081 }
18082 
18083 static SDValue combineSVEReductionFP(SDNode *N, unsigned Opc,
18084  SelectionDAG &DAG) {
18085  SDLoc DL(N);
18086 
18087  SDValue Pred = N->getOperand(1);
18088  SDValue VecToReduce = N->getOperand(2);
18089 
18090  EVT ReduceVT = VecToReduce.getValueType();
18091  SDValue Reduce = DAG.getNode(Opc, DL, ReduceVT, Pred, VecToReduce);
18092 
18093  // SVE reductions set the whole vector register with the first element
18094  // containing the reduction result, which we'll now extract.
18095  SDValue Zero = DAG.getConstant(0, DL, MVT::i64);
18096  return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, N->getValueType(0), Reduce,
18097  Zero);
18098 }
18099 
18101  SelectionDAG &DAG) {
18102  SDLoc DL(N);
18103 
18104  SDValue Pred = N->getOperand(1);
18105  SDValue InitVal = N->getOperand(2);
18106  SDValue VecToReduce = N->getOperand(3);
18107  EVT ReduceVT = VecToReduce.getValueType();
18108 
18109  // Ordered reductions use the first lane of the result vector as the
18110  // reduction's initial value.
18111  SDValue Zero = DAG.getConstant(0, DL, MVT::i64);
18112  InitVal = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, ReduceVT,
18113  DAG.getUNDEF(ReduceVT), InitVal, Zero);
18114 
18115  SDValue Reduce = DAG.getNode(Opc, DL, ReduceVT, Pred, InitVal, VecToReduce);
18116 
18117  // SVE reductions set the whole vector register with the first element
18118  // containing the reduction result, which we'll now extract.
18119  return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, N->getValueType(0), Reduce,
18120  Zero);
18121 }
18122 
18123 // If a merged operation has no inactive lanes we can relax it to a predicated
18124 // or unpredicated operation, which potentially allows better isel (perhaps
18125 // using immediate forms) or relaxing register reuse requirements.
18126 static SDValue convertMergedOpToPredOp(SDNode *N, unsigned Opc,
18127  SelectionDAG &DAG, bool UnpredOp = false,
18128  bool SwapOperands = false) {
18129  assert(N->getOpcode() == ISD::INTRINSIC_WO_CHAIN && "Expected intrinsic!");
18130  assert(N->getNumOperands() == 4 && "Expected 3 operand intrinsic!");
18131  SDValue Pg = N->getOperand(1);
18132  SDValue Op1 = N->getOperand(SwapOperands ? 3 : 2);
18133  SDValue Op2 = N->getOperand(SwapOperands ? 2 : 3);
18134 
18135  // ISD way to specify an all active predicate.
18136  if (isAllActivePredicate(DAG, Pg)) {
18137  if (UnpredOp)
18138  return DAG.getNode(Opc, SDLoc(N), N->getValueType(0), Op1, Op2);
18139 
18140  return DAG.getNode(Opc, SDLoc(N), N->getValueType(0), Pg, Op1, Op2);
18141  }
18142 
18143  // FUTURE: SplatVector(true)
18144  return SDValue();
18145 }
18146 
18149  const AArch64Subtarget *Subtarget) {
18150  SelectionDAG &DAG = DCI.DAG;
18151  unsigned IID = getIntrinsicID(N);
18152  switch (IID) {
18153  default:
18154  break;
18155  case Intrinsic::get_active_lane_mask: {
18156  SDValue Res = SDValue();
18157  EVT VT = N->getValueType(0);
18158  if (VT.isFixedLengthVector()) {
18159  // We can use the SVE whilelo instruction to lower this intrinsic by
18160  // creating the appropriate sequence of scalable vector operations and
18161  // then extracting a fixed-width subvector from the scalable vector.
18162 
18163  SDLoc DL(N);
18164  SDValue ID =
18165  DAG.getTargetConstant(Intrinsic::aarch64_sve_whilelo, DL, MVT::i64);
18166 
18167  EVT WhileVT = EVT::getVectorVT(
18168  *DAG.getContext(), MVT::i1,
18170 
18171  // Get promoted scalable vector VT, i.e. promote nxv4i1 -> nxv4i32.
18172  EVT PromVT = getPromotedVTForPredicate(WhileVT);
18173 
18174  // Get the fixed-width equivalent of PromVT for extraction.
18175  EVT ExtVT =
18177  VT.getVectorElementCount());
18178 
18179  Res = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, WhileVT, ID,
18180  N->getOperand(1), N->getOperand(2));
18181  Res = DAG.getNode(ISD::SIGN_EXTEND, DL, PromVT, Res);
18182  Res = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, ExtVT, Res,
18183  DAG.getConstant(0, DL, MVT::i64));
18184  Res = DAG.getNode(ISD::TRUNCATE, DL, VT, Res);
18185  }
18186  return Res;
18187  }
18188  case Intrinsic::aarch64_neon_vcvtfxs2fp:
18189  case Intrinsic::aarch64_neon_vcvtfxu2fp:
18190  return tryCombineFixedPointConvert(N, DCI, DAG);
18191  case Intrinsic::aarch64_neon_saddv:
18193  case Intrinsic::aarch64_neon_uaddv:
18195  case Intrinsic::aarch64_neon_sminv:
18197  case Intrinsic::aarch64_neon_uminv:
18199  case Intrinsic::aarch64_neon_smaxv:
18201  case Intrinsic::aarch64_neon_umaxv:
18203  case Intrinsic::aarch64_neon_fmax:
18204  return DAG.getNode(ISD::FMAXIMUM, SDLoc(N), N->getValueType(0),
18205  N->getOperand(1), N->getOperand(2));
18206  case Intrinsic::aarch64_neon_fmin:
18207  return DAG.getNode(ISD::FMINIMUM, SDLoc(N), N->getValueType(0),
18208  N->getOperand(1), N->getOperand(2));
18209  case Intrinsic::aarch64_neon_fmaxnm:
18210  return DAG.getNode(ISD::FMAXNUM, SDLoc(N), N->getValueType(0),
18211  N->getOperand(1), N->getOperand(2));
18212  case Intrinsic::aarch64_neon_fminnm:
18213  return DAG.getNode(ISD::FMINNUM, SDLoc(N), N->getValueType(0),
18214  N->getOperand(1), N->getOperand(2));
18215  case Intrinsic::aarch64_neon_smull:
18216  return DAG.getNode(AArch64ISD::SMULL, SDLoc(N), N->getValueType(0),
18217  N->getOperand(1), N->getOperand(2));
18218  case Intrinsic::aarch64_neon_umull:
18219  return DAG.getNode(AArch64ISD::UMULL, SDLoc(N), N->getValueType(0),
18220  N->getOperand(1), N->getOperand(2));
18221  case Intrinsic::aarch64_neon_pmull:
18222  return DAG.getNode(AArch64ISD::PMULL, SDLoc(N), N->getValueType(0),
18223  N->getOperand(1), N->getOperand(2));
18224  case Intrinsic::aarch64_neon_sqdmull:
18225  return tryCombineLongOpWithDup(IID, N, DCI, DAG);
18226  case Intrinsic::aarch64_neon_sqshl:
18227  case Intrinsic::aarch64_neon_uqshl:
18228  case Intrinsic::aarch64_neon_sqshlu:
18229  case Intrinsic::aarch64_neon_srshl:
18230  case Intrinsic::aarch64_neon_urshl:
18231  case Intrinsic::aarch64_neon_sshl:
18232  case Intrinsic::aarch64_neon_ushl:
18233  return tryCombineShiftImm(IID, N, DAG);
18234  case Intrinsic::aarch64_neon_rshrn: {
18235  EVT VT = N->getOperand(1).getValueType();
18236  SDLoc DL(N);
18237  SDValue Imm =
18238  DAG.getConstant(1LLU << (N->getConstantOperandVal(2) - 1), DL, VT);
18239  SDValue Add = DAG.getNode(ISD::ADD, DL, VT, N->getOperand(1), Imm);
18240  SDValue Sht =
18241  DAG.getNode(ISD::SRL, DL, VT, Add,
18242  DAG.getConstant(N->getConstantOperandVal(2), DL, VT));
18243  return DAG.getNode(ISD::TRUNCATE, DL, N->getValueType(0), Sht);
18244  }
18245  case Intrinsic::aarch64_neon_sabd:
18246  return DAG.getNode(ISD::ABDS, SDLoc(N), N->getValueType(0),
18247  N->getOperand(1), N->getOperand(2));
18248  case Intrinsic::aarch64_neon_uabd:
18249  return DAG.getNode(ISD::ABDU, SDLoc(N), N->getValueType(0),
18250  N->getOperand(1), N->getOperand(2));
18251  case Intrinsic::aarch64_crc32b:
18252  case Intrinsic::aarch64_crc32cb:
18253  return tryCombineCRC32(0xff, N, DAG);
18254  case Intrinsic::aarch64_crc32h:
18255  case Intrinsic::aarch64_crc32ch:
18256  return tryCombineCRC32(0xffff, N, DAG);
18257  case Intrinsic::aarch64_sve_saddv:
18258  // There is no i64 version of SADDV because the sign is irrelevant.
18259  if (N->getOperand(2)->getValueType(0).getVectorElementType() == MVT::i64)
18261  else
18263  case Intrinsic::aarch64_sve_uaddv:
18265  case Intrinsic::aarch64_sve_smaxv:
18267  case Intrinsic::aarch64_sve_umaxv:
18269  case Intrinsic::aarch64_sve_sminv:
18271  case Intrinsic::aarch64_sve_uminv:
18273  case Intrinsic::aarch64_sve_orv:
18275  case Intrinsic::aarch64_sve_eorv:
18277  case Intrinsic::aarch64_sve_andv:
18279  case Intrinsic::aarch64_sve_index:
18280  return LowerSVEIntrinsicIndex(N, DAG);
18281  case Intrinsic::aarch64_sve_dup:
18282  return LowerSVEIntrinsicDUP(N, DAG);
18283  case Intrinsic::aarch64_sve_dup_x:
18284  return DAG.getNode(ISD::SPLAT_VECTOR, SDLoc(N), N->getValueType(0),
18285  N->getOperand(1));
18286  case Intrinsic::aarch64_sve_ext:
18287  return LowerSVEIntrinsicEXT(N, DAG);
18288  case Intrinsic::aarch64_sve_mul:
18290  case Intrinsic::aarch64_sve_mul_u:
18291  return DAG.getNode(AArch64ISD::MUL_PRED, SDLoc(N), N->getValueType(0),
18292  N->getOperand(1), N->getOperand(2), N->getOperand(3));
18293  case Intrinsic::aarch64_sve_smulh:
18295  case Intrinsic::aarch64_sve_smulh_u:
18296  return DAG.getNode(AArch64ISD::MULHS_PRED, SDLoc(N), N->getValueType(0),
18297  N->getOperand(1), N->getOperand(2), N->getOperand(3));
18298  case Intrinsic::aarch64_sve_umulh:
18300  case Intrinsic::aarch64_sve_umulh_u:
18301  return DAG.getNode(AArch64ISD::MULHU_PRED, SDLoc(N), N->getValueType(0),
18302  N->getOperand(1), N->getOperand(2), N->getOperand(3));
18303  case Intrinsic::aarch64_sve_smin:
18305  case Intrinsic::aarch64_sve_smin_u:
18306  return DAG.getNode(AArch64ISD::SMIN_PRED, SDLoc(N), N->getValueType(0),
18307  N->getOperand(1), N->getOperand(2), N->getOperand(3));
18308  case Intrinsic::aarch64_sve_umin:
18310  case Intrinsic::aarch64_sve_umin_u:
18311  return DAG.getNode(AArch64ISD::UMIN_PRED, SDLoc(N), N->getValueType(0),
18312  N->getOperand(1), N->getOperand(2), N->getOperand(3));
18313  case Intrinsic::aarch64_sve_smax:
18315  case Intrinsic::aarch64_sve_smax_u:
18316  return DAG.getNode(AArch64ISD::SMAX_PRED, SDLoc(N), N->getValueType(0),
18317  N->getOperand(1), N->getOperand(2), N->getOperand(3));
18318  case Intrinsic::aarch64_sve_umax:
18320  case Intrinsic::aarch64_sve_umax_u:
18321  return DAG.getNode(AArch64ISD::UMAX_PRED, SDLoc(N), N->getValueType(0),
18322  N->getOperand(1), N->getOperand(2), N->getOperand(3));
18323  case Intrinsic::aarch64_sve_lsl:
18325  case Intrinsic::aarch64_sve_lsl_u:
18326  return DAG.getNode(AArch64ISD::SHL_PRED, SDLoc(N), N->getValueType(0),
18327  N->getOperand(1), N->getOperand(2), N->getOperand(3));
18328  case Intrinsic::aarch64_sve_lsr:
18330  case Intrinsic::aarch64_sve_lsr_u:
18331  return DAG.getNode(AArch64ISD::SRL_PRED, SDLoc(N), N->getValueType(0),
18332  N->getOperand(1), N->getOperand(2), N->getOperand(3));
18333  case Intrinsic::aarch64_sve_asr:
18335  case Intrinsic::aarch64_sve_asr_u:
18336  return DAG.getNode(AArch64ISD::SRA_PRED, SDLoc(N), N->getValueType(0),
18337  N->getOperand(1), N->getOperand(2), N->getOperand(3));
18338  case Intrinsic::aarch64_sve_fadd:
18340  case Intrinsic::aarch64_sve_fsub:
18342  case Intrinsic::aarch64_sve_fmul:
18344  case Intrinsic::aarch64_sve_add:
18345  return convertMergedOpToPredOp(N, ISD::ADD, DAG, true);
18346  case Intrinsic::aarch64_sve_add_u:
18347  return DAG.getNode(ISD::ADD, SDLoc(N), N->getValueType(0), N->getOperand(2),
18348  N->getOperand(3));
18349  case Intrinsic::aarch64_sve_sub:
18350  return convertMergedOpToPredOp(N, ISD::SUB, DAG, true);
18351  case Intrinsic::aarch64_sve_sub_u:
18352  return DAG.getNode(ISD::SUB, SDLoc(N), N->getValueType(0), N->getOperand(2),
18353  N->getOperand(3));
18354  case Intrinsic::aarch64_sve_subr:
18355  return convertMergedOpToPredOp(N, ISD::SUB, DAG, true, true);
18356  case Intrinsic::aarch64_sve_and:
18357  return convertMergedOpToPredOp(N, ISD::AND, DAG, true);
18358  case Intrinsic::aarch64_sve_and_u:
18359  return DAG.getNode(ISD::AND, SDLoc(N), N->getValueType(0), N->getOperand(2),
18360  N->getOperand(3));
18361  case Intrinsic::aarch64_sve_bic:
18362  return convertMergedOpToPredOp(N, AArch64ISD::BIC, DAG, true);
18363  case Intrinsic::aarch64_sve_bic_u:
18364  return DAG.getNode(AArch64ISD::BIC, SDLoc(N), N->getValueType(0),
18365  N->getOperand(2), N->getOperand(3));
18366  case Intrinsic::aarch64_sve_eor:
18367  return convertMergedOpToPredOp(N, ISD::XOR, DAG, true);
18368  case Intrinsic::aarch64_sve_eor_u:
18369  return DAG.getNode(ISD::XOR, SDLoc(N), N->getValueType(0), N->getOperand(2),
18370  N->getOperand(3));
18371  case Intrinsic::aarch64_sve_orr:
18372  return convertMergedOpToPredOp(N, ISD::OR, DAG, true);
18373  case Intrinsic::aarch64_sve_orr_u:
18374  return DAG.getNode(ISD::OR, SDLoc(N), N->getValueType(0), N->getOperand(2),
18375  N->getOperand(3));
18376  case Intrinsic::aarch64_sve_sabd:
18377  return convertMergedOpToPredOp(N, ISD::ABDS, DAG, true);
18378  case Intrinsic::aarch64_sve_sabd_u:
18379  return DAG.getNode(ISD::ABDS, SDLoc(N), N->getValueType(0),
18380  N->getOperand(2), N->getOperand(3));
18381  case Intrinsic::aarch64_sve_uabd:
18382  return convertMergedOpToPredOp(N, ISD::ABDU, DAG, true);
18383  case Intrinsic::aarch64_sve_uabd_u:
18384  return DAG.getNode(ISD::ABDU, SDLoc(N), N->getValueType(0),
18385  N->getOperand(2), N->getOperand(3));
18386  case Intrinsic::aarch64_sve_sdiv_u:
18387  return DAG.getNode(AArch64ISD::SDIV_PRED, SDLoc(N), N->getValueType(0),
18388  N->getOperand(1), N->getOperand(2), N->getOperand(3));
18389  case Intrinsic::aarch64_sve_udiv_u:
18390  return DAG.getNode(AArch64ISD::UDIV_PRED, SDLoc(N), N->getValueType(0),
18391  N->getOperand(1), N->getOperand(2), N->getOperand(3));
18392  case Intrinsic::aarch64_sve_sqadd:
18393  return convertMergedOpToPredOp(N, ISD::SADDSAT, DAG, true);
18394  case Intrinsic::aarch64_sve_sqsub:
18395  return convertMergedOpToPredOp(N, ISD::SSUBSAT, DAG, true);
18396  case Intrinsic::aarch64_sve_uqadd:
18397  return convertMergedOpToPredOp(N, ISD::UADDSAT, DAG, true);
18398  case Intrinsic::aarch64_sve_uqsub:
18399  return convertMergedOpToPredOp(N, ISD::USUBSAT, DAG, true);
18400  case Intrinsic::aarch64_sve_sqadd_x:
18401  return DAG.getNode(ISD::SADDSAT, SDLoc(N), N->getValueType(0),
18402  N->getOperand(1), N->getOperand(2));
18403  case Intrinsic::aarch64_sve_sqsub_x:
18404  return DAG.getNode(ISD::SSUBSAT, SDLoc(N), N->getValueType(0),
18405  N->getOperand(1), N->getOperand(2));
18406  case Intrinsic::aarch64_sve_uqadd_x:
18407  return DAG.getNode(ISD::UADDSAT, SDLoc(N), N->getValueType(0),
18408  N->getOperand(1), N->getOperand(2));
18409  case Intrinsic::aarch64_sve_uqsub_x:
18410  return DAG.getNode(ISD::USUBSAT, SDLoc(N), N->getValueType(0),
18411  N->getOperand(1), N->getOperand(2));
18412  case Intrinsic::aarch64_sve_asrd:
18413  return DAG.getNode(AArch64ISD::SRAD_MERGE_OP1, SDLoc(N), N->getValueType(0),
18414  N->getOperand(1), N->getOperand(2), N->getOperand(3));
18415  case Intrinsic::aarch64_sve_cmphs:
18416  if (!N->getOperand(2).getValueType().isFloatingPoint())
18418  N->getValueType(0), N->getOperand(1), N->getOperand(2),
18419  N->getOperand(3), DAG.getCondCode(ISD::SETUGE));
18420  break;
18421  case Intrinsic::aarch64_sve_cmphi:
18422  if (!N->getOperand(2).getValueType().isFloatingPoint())
18424  N->getValueType(0), N->getOperand(1), N->getOperand(2),
18425  N->getOperand(3), DAG.getCondCode(ISD::SETUGT));
18426  break;
18427  case Intrinsic::aarch64_sve_fcmpge:
18428  case Intrinsic::aarch64_sve_cmpge:
18430  N->getValueType(0), N->getOperand(1), N->getOperand(2),
18431  N->getOperand(3), DAG.getCondCode(ISD::SETGE));
18432  break;
18433  case Intrinsic::aarch64_sve_fcmpgt:
18434  case Intrinsic::aarch64_sve_cmpgt:
18436  N->getValueType(0), N->getOperand(1), N->getOperand(2),
18437  N->getOperand(3), DAG.getCondCode(ISD::SETGT));
18438  break;
18439  case Intrinsic::aarch64_sve_fcmpeq:
18440  case Intrinsic::aarch64_sve_cmpeq:
18442  N->getValueType(0), N->getOperand(1), N->getOperand(2),
18443  N->getOperand(3), DAG.getCondCode(ISD::SETEQ));
18444  break;
18445  case Intrinsic::aarch64_sve_fcmpne:
18446  case Intrinsic::aarch64_sve_cmpne:
18448  N->getValueType(0), N->getOperand(1), N->getOperand(2),
18449  N->getOperand(3), DAG.getCondCode(ISD::SETNE));
18450  break;
18451  case Intrinsic::aarch64_sve_fcmpuo:
18453  N->getValueType(0), N->getOperand(1), N->getOperand(2),
18454  N->getOperand(3), DAG.getCondCode(ISD::SETUO));
18455  break;
18456  case Intrinsic::aarch64_sve_fadda:
18458  case Intrinsic::aarch64_sve_faddv:
18460  case Intrinsic::aarch64_sve_fmaxnmv:
18462  case Intrinsic::aarch64_sve_fmaxv:
18464  case Intrinsic::aarch64_sve_fminnmv:
18466  case Intrinsic::aarch64_sve_fminv:
18468  case Intrinsic::aarch64_sve_sel:
18469  return DAG.getNode(ISD::VSELECT, SDLoc(N), N->getValueType(0),
18470  N->getOperand(1), N->getOperand(2), N->getOperand(3));
18471  case Intrinsic::aarch64_sve_cmpeq_wide:
18472  return tryConvertSVEWideCompare(N, ISD::SETEQ, DCI, DAG);
18473  case Intrinsic::aarch64_sve_cmpne_wide:
18474  return tryConvertSVEWideCompare(N, ISD::SETNE, DCI, DAG);
18475  case Intrinsic::aarch64_sve_cmpge_wide:
18476  return tryConvertSVEWideCompare(N, ISD::SETGE, DCI, DAG);
18477  case Intrinsic::aarch64_sve_cmpgt_wide:
18478  return tryConvertSVEWideCompare(N, ISD::SETGT, DCI, DAG);
18479  case Intrinsic::aarch64_sve_cmplt_wide:
18480  return tryConvertSVEWideCompare(N, ISD::SETLT, DCI, DAG);
18481  case Intrinsic::aarch64_sve_cmple_wide:
18482  return tryConvertSVEWideCompare(N, ISD::SETLE, DCI, DAG);
18483  case Intrinsic::aarch64_sve_cmphs_wide:
18484  return tryConvertSVEWideCompare(N, ISD::SETUGE, DCI, DAG);
18485  case Intrinsic::aarch64_sve_cmphi_wide:
18486  return tryConvertSVEWideCompare(N, ISD::SETUGT, DCI, DAG);
18487  case Intrinsic::aarch64_sve_cmplo_wide:
18488  return tryConvertSVEWideCompare(N, ISD::SETULT, DCI, DAG);
18489  case Intrinsic::aarch64_sve_cmpls_wide:
18490  return tryConvertSVEWideCompare(N, ISD::SETULE, DCI, DAG);
18491  case Intrinsic::aarch64_sve_ptest_any:
18492  return getPTest(DAG, N->getValueType(0), N->getOperand(1), N->getOperand(2),
18494  case Intrinsic::aarch64_sve_ptest_first:
18495  return getPTest(DAG, N->getValueType(0), N->getOperand(1), N->getOperand(2),
18497  case Intrinsic::aarch64_sve_ptest_last:
18498  return getPTest(DAG, N->getValueType(0), N->getOperand(1), N->getOperand(2),
18500  }
18501  return SDValue();
18502 }
18503 
18504 static bool isCheapToExtend(const SDValue &N) {
18505  unsigned OC = N->getOpcode();
18506  return OC == ISD::LOAD || OC == ISD::MLOAD ||
18508 }
18509 
18510 static SDValue
18512  SelectionDAG &DAG) {
18513  // If we have (sext (setcc A B)) and A and B are cheap to extend,
18514  // we can move the sext into the arguments and have the same result. For
18515  // example, if A and B are both loads, we can make those extending loads and
18516  // avoid an extra instruction. This pattern appears often in VLS code
18517  // generation where the inputs to the setcc have a different size to the
18518  // instruction that wants to use the result of the setcc.
18519  assert(N->getOpcode() == ISD::SIGN_EXTEND &&
18520  N->getOperand(0)->getOpcode() == ISD::SETCC);
18521  const SDValue SetCC = N->getOperand(0);
18522 
18523  const SDValue CCOp0 = SetCC.getOperand(0);
18524  const SDValue CCOp1 = SetCC.getOperand(1);
18525  if (!CCOp0->getValueType(0).isInteger() ||
18526  !CCOp1->getValueType(0).isInteger())
18527  return SDValue();
18528 
18529  ISD::CondCode Code =
18530  cast<CondCodeSDNode>(SetCC->getOperand(2).getNode())->get();
18531 
18532  ISD::NodeType ExtType =
18534 
18535  if (isCheapToExtend(SetCC.getOperand(0)) &&
18536  isCheapToExtend(SetCC.getOperand(1))) {
18537  const SDValue Ext1 =
18538  DAG.getNode(ExtType, SDLoc(N), N->getValueType(0), CCOp0);
18539  const SDValue Ext2 =
18540  DAG.getNode(ExtType, SDLoc(N), N->getValueType(0), CCOp1);
18541 
18542  return DAG.getSetCC(
18543  SDLoc(SetCC), N->getValueType(0), Ext1, Ext2,
18544  cast<CondCodeSDNode>(SetCC->getOperand(2).getNode())->get());
18545  }
18546 
18547  return SDValue();
18548 }
18549 
18552  SelectionDAG &DAG) {
18553  // If we see something like (zext (sabd (extract_high ...), (DUP ...))) then
18554  // we can convert that DUP into another extract_high (of a bigger DUP), which
18555  // helps the backend to decide that an sabdl2 would be useful, saving a real
18556  // extract_high operation.
18557  if (!DCI.isBeforeLegalizeOps() && N->getOpcode() == ISD::ZERO_EXTEND &&
18558  (N->getOperand(0).getOpcode() == ISD::ABDU ||
18559  N->getOperand(0).getOpcode() == ISD::ABDS)) {
18560  SDNode *ABDNode = N->getOperand(0).getNode();
18561  SDValue NewABD =
18563  if (!NewABD.getNode())
18564  return SDValue();
18565 
18566  return DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N), N->getValueType(0), NewABD);
18567  }
18568 
18569  if (N->getValueType(0).isFixedLengthVector() &&
18570  N->getOpcode() == ISD::SIGN_EXTEND &&
18571  N->getOperand(0)->getOpcode() == ISD::SETCC)
18572  return performSignExtendSetCCCombine(N, DCI, DAG);
18573 
18574  return SDValue();
18575 }
18576 
18578  SDValue SplatVal, unsigned NumVecElts) {
18579  assert(!St.isTruncatingStore() && "cannot split truncating vector store");
18580  Align OrigAlignment = St.getAlign();
18581  unsigned EltOffset = SplatVal.getValueType().getSizeInBits() / 8;
18582 
18583  // Create scalar stores. This is at least as good as the code sequence for a
18584  // split unaligned store which is a dup.s, ext.b, and two stores.
18585  // Most of the time the three stores should be replaced by store pair
18586  // instructions (stp).
18587  SDLoc DL(&St);
18588  SDValue BasePtr = St.getBasePtr();
18589  uint64_t BaseOffset = 0;
18590 
18591  const MachinePointerInfo &PtrInfo = St.getPointerInfo();
18592  SDValue NewST1 =
18593  DAG.getStore(St.getChain(), DL, SplatVal, BasePtr, PtrInfo,
18594  OrigAlignment, St.getMemOperand()->getFlags());
18595 
18596  // As this in ISel, we will not merge this add which may degrade results.
18597  if (BasePtr->getOpcode() == ISD::ADD &&
18598  isa<ConstantSDNode>(BasePtr->getOperand(1))) {
18599  BaseOffset = cast<ConstantSDNode>(BasePtr->getOperand(1))->getSExtValue();
18600  BasePtr = BasePtr->getOperand(0);
18601  }
18602 
18603  unsigned Offset = EltOffset;
18604  while (--NumVecElts) {
18605  Align Alignment = commonAlignment(OrigAlignment, Offset);
18606  SDValue OffsetPtr =
18607  DAG.getNode(ISD::ADD, DL, MVT::i64, BasePtr,
18608  DAG.getConstant(BaseOffset + Offset, DL, MVT::i64));
18609  NewST1 = DAG.getStore(NewST1.getValue(0), DL, SplatVal, OffsetPtr,
18610  PtrInfo.getWithOffset(Offset), Alignment,
18611  St.getMemOperand()->getFlags());
18612  Offset += EltOffset;
18613  }
18614  return NewST1;
18615 }
18616 
18617 // Returns an SVE type that ContentTy can be trivially sign or zero extended
18618 // into.
18619 static MVT getSVEContainerType(EVT ContentTy) {
18620  assert(ContentTy.isSimple() && "No SVE containers for extended types");
18621 
18622  switch (ContentTy.getSimpleVT().SimpleTy) {
18623  default:
18624  llvm_unreachable("No known SVE container for this MVT type");
18625  case MVT::nxv2i8:
18626  case MVT::nxv2i16:
18627  case MVT::nxv2i32:
18628  case MVT::nxv2i64:
18629  case MVT::nxv2f32:
18630  case MVT::nxv2f64:
18631  return MVT::nxv2i64;
18632  case MVT::nxv4i8:
18633  case MVT::nxv4i16:
18634  case MVT::nxv4i32:
18635  case MVT::nxv4f32:
18636  return MVT::nxv4i32;
18637  case MVT::nxv8i8:
18638  case MVT::nxv8i16:
18639  case MVT::nxv8f16:
18640  case MVT::nxv8bf16:
18641  return MVT::nxv8i16;
18642  case MVT::nxv16i8:
18643  return MVT::nxv16i8;
18644  }
18645 }
18646 
18647 static SDValue performLD1Combine(SDNode *N, SelectionDAG &DAG, unsigned Opc) {
18648  SDLoc DL(N);
18649  EVT VT = N->getValueType(0);
18650 
18652  return SDValue();
18653 
18654  EVT ContainerVT = VT;
18655  if (ContainerVT.isInteger())
18656  ContainerVT = getSVEContainerType(ContainerVT);
18657 
18658  SDVTList VTs = DAG.getVTList(ContainerVT, MVT::Other);
18659  SDValue Ops[] = { N->getOperand(0), // Chain
18660  N->getOperand(2), // Pg
18661  N->getOperand(3), // Base
18662  DAG.getValueType(VT) };
18663 
18664  SDValue Load = DAG.getNode(Opc, DL, VTs, Ops);
18665  SDValue LoadChain = SDValue(Load.getNode(), 1);
18666 
18667  if (ContainerVT.isInteger() && (VT != ContainerVT))
18668  Load = DAG.getNode(ISD::TRUNCATE, DL, VT, Load.getValue(0));
18669 
18670  return DAG.getMergeValues({ Load, LoadChain }, DL);
18671 }
18672 
18674  SDLoc DL(N);
18675  EVT VT = N->getValueType(0);
18676  EVT PtrTy = N->getOperand(3).getValueType();
18677 
18678  EVT LoadVT = VT;
18679  if (VT.isFloatingPoint())
18680  LoadVT = VT.changeTypeToInteger();
18681 
18682  auto *MINode = cast<MemIntrinsicSDNode>(N);
18683  SDValue PassThru = DAG.getConstant(0, DL, LoadVT);
18684  SDValue L = DAG.getMaskedLoad(LoadVT, DL, MINode->getChain(),
18685  MINode->getOperand(3), DAG.getUNDEF(PtrTy),
18686  MINode->getOperand(2), PassThru,
18687  MINode->getMemoryVT(), MINode->getMemOperand(),
18689 
18690  if (VT.isFloatingPoint()) {
18691  SDValue Ops[] = { DAG.getNode(ISD::BITCAST, DL, VT, L), L.getValue(1) };
18692  return DAG.getMergeValues(Ops, DL);
18693  }
18694 
18695  return L;
18696 }
18697 
18698 template <unsigned Opcode>
18700  static_assert(Opcode == AArch64ISD::LD1RQ_MERGE_ZERO ||
18701  Opcode == AArch64ISD::LD1RO_MERGE_ZERO,
18702  "Unsupported opcode.");
18703  SDLoc DL(N);
18704  EVT VT = N->getValueType(0);
18705 
18706  EVT LoadVT = VT;
18707  if (VT.isFloatingPoint())
18708  LoadVT = VT.changeTypeToInteger();
18709 
18710  SDValue Ops[] = {N->getOperand(0), N->getOperand(2), N->getOperand(3)};
18711  SDValue Load = DAG.getNode(Opcode, DL, {LoadVT, MVT::Other}, Ops);
18712  SDValue LoadChain = SDValue(Load.getNode(), 1);
18713 
18714  if (VT.isFloatingPoint())
18715  Load = DAG.getNode(ISD::BITCAST, DL, VT, Load.getValue(0));
18716 
18717  return DAG.getMergeValues({Load, LoadChain}, DL);
18718 }
18719 
18721  SDLoc DL(N);
18722  SDValue Data = N->getOperand(2);
18723  EVT DataVT = Data.getValueType();
18724  EVT HwSrcVt = getSVEContainerType(DataVT);
18725  SDValue InputVT = DAG.getValueType(DataVT);
18726 
18727  if (DataVT.isFloatingPoint())
18728  InputVT = DAG.getValueType(HwSrcVt);
18729 
18730  SDValue SrcNew;
18731  if (Data.getValueType().isFloatingPoint())
18732  SrcNew = DAG.getNode(ISD::BITCAST, DL, HwSrcVt, Data);
18733  else
18734  SrcNew = DAG.getNode(ISD::ANY_EXTEND, DL, HwSrcVt, Data);
18735 
18736  SDValue Ops[] = { N->getOperand(0), // Chain
18737  SrcNew,
18738  N->getOperand(4), // Base
18739  N->getOperand(3), // Pg
18740  InputVT
18741  };
18742 
18743  return DAG.getNode(AArch64ISD::ST1_PRED, DL, N->getValueType(0), Ops);
18744 }
18745 
18747  SDLoc DL(N);
18748 
18749  SDValue Data = N->getOperand(2);
18750  EVT DataVT = Data.getValueType();
18751  EVT PtrTy = N->getOperand(4).getValueType();
18752 
18753  if (DataVT.isFloatingPoint())
18754  Data = DAG.getNode(ISD::BITCAST, DL, DataVT.changeTypeToInteger(), Data);
18755 
18756  auto *MINode = cast<MemIntrinsicSDNode>(N);
18757  return DAG.getMaskedStore(MINode->getChain(), DL, Data, MINode->getOperand(4),
18758  DAG.getUNDEF(PtrTy), MINode->getOperand(3),
18759  MINode->getMemoryVT(), MINode->getMemOperand(),
18760  ISD::UNINDEXED, false, false);
18761 }
18762 
18763 /// Replace a splat of zeros to a vector store by scalar stores of WZR/XZR. The
18764 /// load store optimizer pass will merge them to store pair stores. This should
18765 /// be better than a movi to create the vector zero followed by a vector store
18766 /// if the zero constant is not re-used, since one instructions and one register
18767 /// live range will be removed.
18768 ///
18769 /// For example, the final generated code should be:
18770 ///
18771 /// stp xzr, xzr, [x0]
18772 ///
18773 /// instead of:
18774 ///
18775 /// movi v0.2d, #0
18776 /// str q0, [x0]
18777 ///
18779  SDValue StVal = St.getValue();
18780  EVT VT = StVal.getValueType();
18781 
18782  // Avoid scalarizing zero splat stores for scalable vectors.
18783  if (VT.isScalableVector())
18784  return SDValue();
18785 
18786  // It is beneficial to scalarize a zero splat store for 2 or 3 i64 elements or
18787  // 2, 3 or 4 i32 elements.
18788  int NumVecElts = VT.getVectorNumElements();
18789  if (!(((NumVecElts == 2 || NumVecElts == 3) &&
18790  VT.getVectorElementType().getSizeInBits() == 64) ||
18791  ((NumVecElts == 2 || NumVecElts == 3 || NumVecElts == 4) &&
18792  VT.getVectorElementType().getSizeInBits() == 32)))
18793  return SDValue();
18794 
18795  if (StVal.getOpcode() != ISD::BUILD_VECTOR)
18796  return SDValue();
18797 
18798  // If the zero constant has more than one use then the vector store could be
18799  // better since the constant mov will be amortized and stp q instructions
18800  // should be able to be formed.
18801  if (!StVal.hasOneUse())
18802  return SDValue();
18803 
18804  // If the store is truncating then it's going down to i16 or smaller, which
18805  // means it can be implemented in a single store anyway.
18806  if (St.isTruncatingStore())
18807  return SDValue();
18808 
18809  // If the immediate offset of the address operand is too large for the stp
18810  // instruction, then bail out.
18811  if (DAG.isBaseWithConstantOffset(St.getBasePtr())) {
18812  int64_t Offset = St.getBasePtr()->getConstantOperandVal(1);
18813  if (Offset < -512 || Offset > 504)
18814  return SDValue();
18815  }
18816 
18817  for (int I = 0; I < NumVecElts; ++I) {
18818  SDValue EltVal = StVal.getOperand(I);
18819  if (!isNullConstant(EltVal) && !isNullFPConstant(EltVal))
18820  return SDValue();
18821  }
18822 
18823  // Use a CopyFromReg WZR/XZR here to prevent
18824  // DAGCombiner::MergeConsecutiveStores from undoing this transformation.
18825  SDLoc DL(&St);
18826  unsigned ZeroReg;
18827  EVT ZeroVT;
18828  if (VT.getVectorElementType().getSizeInBits() == 32) {
18829  ZeroReg = AArch64::WZR;
18830  ZeroVT = MVT::i32;
18831  } else {
18832  ZeroReg = AArch64::XZR;
18833  ZeroVT = MVT::i64;
18834  }
18835  SDValue SplatVal =
18836  DAG.getCopyFromReg(DAG.getEntryNode(), DL, ZeroReg, ZeroVT);
18837  return splitStoreSplat(DAG, St, SplatVal, NumVecElts);
18838 }
18839 
18840 /// Replace a splat of a scalar to a vector store by scalar stores of the scalar
18841 /// value. The load store optimizer pass will merge them to store pair stores.
18842 /// This has better performance than a splat of the scalar followed by a split
18843 /// vector store. Even if the stores are not merged it is four stores vs a dup,
18844 /// followed by an ext.b and two stores.
18846  SDValue StVal = St.getValue();
18847  EVT VT = StVal.getValueType();
18848 
18849  // Don't replace floating point stores, they possibly won't be transformed to
18850  // stp because of the store pair suppress pass.
18851  if (VT.isFloatingPoint())
18852  return SDValue();
18853 
18854  // We can express a splat as store pair(s) for 2 or 4 elements.
18855  unsigned NumVecElts = VT.getVectorNumElements();
18856  if (NumVecElts != 4 && NumVecElts != 2)
18857  return SDValue();
18858 
18859  // If the store is truncating then it's going down to i16 or smaller, which
18860  // means it can be implemented in a single store anyway.
18861  if (St.isTruncatingStore())
18862  return SDValue();
18863 
18864  // Check that this is a splat.
18865  // Make sure that each of the relevant vector element locations are inserted
18866  // to, i.e. 0 and 1 for v2i64 and 0, 1, 2, 3 for v4i32.
18867  std::bitset<4> IndexNotInserted((1 << NumVecElts) - 1);
18868  SDValue SplatVal;
18869  for (unsigned I = 0; I < NumVecElts; ++I) {
18870  // Check for insert vector elements.
18871  if (StVal.getOpcode() != ISD::INSERT_VECTOR_ELT)
18872  return SDValue();
18873 
18874  // Check that same value is inserted at each vector element.
18875  if (I == 0)
18876  SplatVal = StVal.getOperand(1);
18877  else if (StVal.getOperand(1) != SplatVal)
18878  return SDValue();
18879 
18880  // Check insert element index.
18881  ConstantSDNode *CIndex = dyn_cast<ConstantSDNode>(StVal.getOperand(2));
18882  if (!CIndex)
18883  return SDValue();
18884  uint64_t IndexVal = CIndex->getZExtValue();
18885  if (IndexVal >= NumVecElts)
18886  return SDValue();
18887  IndexNotInserted.reset(IndexVal);
18888 
18889  StVal = StVal.getOperand(0);
18890  }
18891  // Check that all vector element locations were inserted to.
18892  if (IndexNotInserted.any())
18893  return SDValue();
18894 
18895  return splitStoreSplat(DAG, St, SplatVal, NumVecElts);
18896 }
18897 
18899  SelectionDAG &DAG,
18900  const AArch64Subtarget *Subtarget) {
18901 
18902  StoreSDNode *S = cast<StoreSDNode>(N);
18903  if (S->isVolatile() || S->isIndexed())
18904  return SDValue();
18905 
18906  SDValue StVal = S->getValue();
18907  EVT VT = StVal.getValueType();
18908 
18909  if (!VT.isFixedLengthVector())
18910  return SDValue();
18911 
18912  // If we get a splat of zeros, convert this vector store to a store of
18913  // scalars. They will be merged into store pairs of xzr thereby removing one
18914  // instruction and one register.
18915  if (SDValue ReplacedZeroSplat = replaceZeroVectorStore(DAG, *S))
18916  return ReplacedZeroSplat;
18917 
18918  // FIXME: The logic for deciding if an unaligned store should be split should
18919  // be included in TLI.allowsMisalignedMemoryAccesses(), and there should be
18920  // a call to that function here.
18921 
18922  if (!Subtarget->isMisaligned128StoreSlow())
18923  return SDValue();
18924 
18925  // Don't split at -Oz.
18927  return SDValue();
18928 
18929  // Don't split v2i64 vectors. Memcpy lowering produces those and splitting
18930  // those up regresses performance on micro-benchmarks and olden/bh.
18931  if (VT.getVectorNumElements() < 2 || VT == MVT::v2i64)
18932  return SDValue();
18933 
18934  // Split unaligned 16B stores. They are terrible for performance.
18935  // Don't split stores with alignment of 1 or 2. Code that uses clang vector
18936  // extensions can use this to mark that it does not want splitting to happen
18937  // (by underspecifying alignment to be 1 or 2). Furthermore, the chance of
18938  // eliminating alignment hazards is only 1 in 8 for alignment of 2.
18939  if (VT.getSizeInBits() != 128 || S->getAlign() >= Align(16) ||
18940  S->getAlign() <= Align(2))
18941  return SDValue();
18942 
18943  // If we get a splat of a scalar convert this vector store to a store of
18944  // scalars. They will be merged into store pairs thereby removing two
18945  // instructions.
18946  if (SDValue ReplacedSplat = replaceSplatVectorStore(DAG, *S))
18947  return ReplacedSplat;
18948 
18949  SDLoc DL(S);
18950 
18951  // Split VT into two.
18952  EVT HalfVT = VT.getHalfNumVectorElementsVT(*DAG.getContext());
18953  unsigned NumElts = HalfVT.getVectorNumElements();
18954  SDValue SubVector0 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, HalfVT, StVal,
18955  DAG.getConstant(0, DL, MVT::i64));
18956  SDValue SubVector1 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, HalfVT, StVal,
18957  DAG.getConstant(NumElts, DL, MVT::i64));
18958  SDValue BasePtr = S->getBasePtr();
18959  SDValue NewST1 =
18960  DAG.getStore(S->getChain(), DL, SubVector0, BasePtr, S->getPointerInfo(),
18961  S->getAlign(), S->getMemOperand()->getFlags());
18962  SDValue OffsetPtr = DAG.getNode(ISD::ADD, DL, MVT::i64, BasePtr,
18963  DAG.getConstant(8, DL, MVT::i64));
18964  return DAG.getStore(NewST1.getValue(0), DL, SubVector1, OffsetPtr,
18965  S->getPointerInfo(), S->getAlign(),
18966  S->getMemOperand()->getFlags());
18967 }
18968 
18970  assert(N->getOpcode() == AArch64ISD::SPLICE && "Unexepected Opcode!");
18971 
18972  // splice(pg, op1, undef) -> op1
18973  if (N->getOperand(2).isUndef())
18974  return N->getOperand(1);
18975 
18976  return SDValue();
18977 }
18978 
18980  const AArch64Subtarget *Subtarget) {
18981  assert((N->getOpcode() == AArch64ISD::UUNPKHI ||
18982  N->getOpcode() == AArch64ISD::UUNPKLO) &&
18983  "Unexpected Opcode!");
18984 
18985  // uunpklo/hi undef -> undef
18986  if (N->getOperand(0).isUndef())
18987  return DAG.getUNDEF(N->getValueType(0));
18988 
18989  // If this is a masked load followed by an UUNPKLO, fold this into a masked
18990  // extending load. We can do this even if this is already a masked
18991  // {z,}extload.
18992  if (N->getOperand(0).getOpcode() == ISD::MLOAD &&
18993  N->getOpcode() == AArch64ISD::UUNPKLO) {
18994  MaskedLoadSDNode *MLD = cast<MaskedLoadSDNode>(N->getOperand(0));
18995  SDValue Mask = MLD->getMask();
18996  SDLoc DL(N);
18997 
18998  if (MLD->isUnindexed() && MLD->getExtensionType() != ISD::SEXTLOAD &&
18999  SDValue(MLD, 0).hasOneUse() && Mask->getOpcode() == AArch64ISD::PTRUE &&
19000  (MLD->getPassThru()->isUndef() ||
19001  isZerosVector(MLD->getPassThru().getNode()))) {
19002  unsigned MinSVESize = Subtarget->getMinSVEVectorSizeInBits();
19003  unsigned PgPattern = Mask->getConstantOperandVal(0);
19004  EVT VT = N->getValueType(0);
19005 
19006  // Ensure we can double the size of the predicate pattern
19007  unsigned NumElts = getNumElementsFromSVEPredPattern(PgPattern);
19008  if (NumElts &&
19009  NumElts * VT.getVectorElementType().getSizeInBits() <= MinSVESize) {
19010  Mask =
19011  getPTrue(DAG, DL, VT.changeVectorElementType(MVT::i1), PgPattern);
19012  SDValue PassThru = DAG.getConstant(0, DL, VT);
19013  SDValue NewLoad = DAG.getMaskedLoad(
19014  VT, DL, MLD->getChain(), MLD->getBasePtr(), MLD->getOffset(), Mask,
19015  PassThru, MLD->getMemoryVT(), MLD->getMemOperand(),
19017 
19018  DAG.ReplaceAllUsesOfValueWith(SDValue(MLD, 1), NewLoad.getValue(1));
19019 
19020  return NewLoad;
19021  }
19022  }
19023  }
19024 
19025  return SDValue();
19026 }
19027 
19029  SDLoc DL(N);
19030  SDValue Op0 = N->getOperand(0);
19031  SDValue Op1 = N->getOperand(1);
19032  EVT ResVT = N->getValueType(0);
19033 
19034  // uzp1(x, undef) -> concat(truncate(x), undef)
19035  if (Op1.getOpcode() == ISD::UNDEF) {
19036  EVT BCVT = MVT::Other, HalfVT = MVT::Other;
19037  switch (ResVT.getSimpleVT().SimpleTy) {
19038  default:
19039  break;
19040  case MVT::v16i8:
19041  BCVT = MVT::v8i16;
19042  HalfVT = MVT::v8i8;
19043  break;
19044  case MVT::v8i16:
19045  BCVT = MVT::v4i32;
19046  HalfVT = MVT::v4i16;
19047  break;
19048  case MVT::v4i32:
19049  BCVT = MVT::v2i64;
19050  HalfVT = MVT::v2i32;
19051  break;
19052  }
19053  if (BCVT != MVT::Other) {
19054  SDValue BC = DAG.getBitcast(BCVT, Op0);
19055  SDValue Trunc = DAG.getNode(ISD::TRUNCATE, DL, HalfVT, BC);
19056  return DAG.getNode(ISD::CONCAT_VECTORS, DL, ResVT, Trunc,
19057  DAG.getUNDEF(HalfVT));
19058  }
19059  }
19060 
19061  // uzp1(unpklo(uzp1(x, y)), z) => uzp1(x, z)
19062  if (Op0.getOpcode() == AArch64ISD::UUNPKLO) {
19063  if (Op0.getOperand(0).getOpcode() == AArch64ISD::UZP1) {
19064  SDValue X = Op0.getOperand(0).getOperand(0);
19065  return DAG.getNode(AArch64ISD::UZP1, DL, ResVT, X, Op1);
19066  }
19067  }
19068 
19069  // uzp1(x, unpkhi(uzp1(y, z))) => uzp1(x, z)
19070  if (Op1.getOpcode() == AArch64ISD::UUNPKHI) {
19071  if (Op1.getOperand(0).getOpcode() == AArch64ISD::UZP1) {
19072  SDValue Z = Op1.getOperand(0).getOperand(1);
19073  return DAG.getNode(AArch64ISD::UZP1, DL, ResVT, Op0, Z);
19074  }
19075  }
19076 
19077  // uzp1(xtn x, xtn y) -> xtn(uzp1 (x, y))
19078  // Only implemented on little-endian subtargets.
19079  bool IsLittleEndian = DAG.getDataLayout().isLittleEndian();
19080 
19081  // This optimization only works on little endian.
19082  if (!IsLittleEndian)
19083  return SDValue();
19084 
19085  if (ResVT != MVT::v2i32 && ResVT != MVT::v4i16 && ResVT != MVT::v8i8)
19086  return SDValue();
19087 
19088  auto getSourceOp = [](SDValue Operand) -> SDValue {
19089  const unsigned Opcode = Operand.getOpcode();
19090  if (Opcode == ISD::TRUNCATE)
19091  return Operand->getOperand(0);
19092  if (Opcode == ISD::BITCAST &&
19093  Operand->getOperand(0).getOpcode() == ISD::TRUNCATE)
19094  return Operand->getOperand(0)->getOperand(0);
19095  return SDValue();
19096  };
19097 
19098  SDValue SourceOp0 = getSourceOp(Op0);
19099  SDValue SourceOp1 = getSourceOp(Op1);
19100 
19101  if (!SourceOp0 || !SourceOp1)
19102  return SDValue();
19103 
19104  if (SourceOp0.getValueType() != SourceOp1.getValueType() ||
19105  !SourceOp0.getValueType().isSimple())
19106  return SDValue();
19107 
19108  EVT ResultTy;
19109 
19110  switch (SourceOp0.getSimpleValueType().SimpleTy) {
19111  case MVT::v2i64:
19112  ResultTy = MVT::v4i32;
19113  break;
19114  case MVT::v4i32:
19115  ResultTy = MVT::v8i16;
19116  break;
19117  case MVT::v8i16:
19118  ResultTy = MVT::v16i8;
19119  break;
19120  default:
19121  return SDValue();
19122  }
19123 
19124  SDValue UzpOp0 = DAG.getNode(ISD::BITCAST, DL, ResultTy, SourceOp0);
19125  SDValue UzpOp1 = DAG.getNode(ISD::BITCAST, DL, ResultTy, SourceOp1);
19126  SDValue UzpResult =
19127  DAG.getNode(AArch64ISD::UZP1, DL, UzpOp0.getValueType(), UzpOp0, UzpOp1);
19128 
19129  EVT BitcastResultTy;
19130 
19131  switch (ResVT.getSimpleVT().SimpleTy) {
19132  case MVT::v2i32:
19133  BitcastResultTy = MVT::v2i64;
19134  break;
19135  case MVT::v4i16:
19136  BitcastResultTy = MVT::v4i32;
19137  break;
19138  case MVT::v8i8:
19139  BitcastResultTy = MVT::v8i16;
19140  break;
19141  default:
19142  llvm_unreachable("Should be one of {v2i32, v4i16, v8i8}");
19143  }
19144 
19145  return DAG.getNode(ISD::TRUNCATE, DL, ResVT,
19146  DAG.getNode(ISD::BITCAST, DL, BitcastResultTy, UzpResult));
19147 }
19148 
19150  unsigned Opc = N->getOpcode();
19151 
19152  assert(((Opc >= AArch64ISD::GLD1_MERGE_ZERO && // unsigned gather loads
19154  (Opc >= AArch64ISD::GLD1S_MERGE_ZERO && // signed gather loads
19156  "Invalid opcode.");
19157 
19158  const bool Scaled = Opc == AArch64ISD::GLD1_SCALED_MERGE_ZERO ||
19160  const bool Signed = Opc == AArch64ISD::GLD1S_MERGE_ZERO ||
19162  const bool Extended = Opc == AArch64ISD::GLD1_SXTW_MERGE_ZERO ||
19166 
19167  SDLoc DL(N);
19168  SDValue Chain = N->getOperand(0);
19169  SDValue Pg = N->getOperand(1);
19170  SDValue Base = N->getOperand(2);
19171  SDValue Offset = N->getOperand(3);
19172  SDValue Ty = N->getOperand(4);
19173 
19174  EVT ResVT = N->getValueType(0);
19175 
19176  const auto OffsetOpc = Offset.getOpcode();
19177  const bool OffsetIsZExt =
19179  const bool OffsetIsSExt =
19181 
19182  // Fold sign/zero extensions of vector offsets into GLD1 nodes where possible.
19183  if (!Extended && (OffsetIsSExt || OffsetIsZExt)) {
19184  SDValue ExtPg = Offset.getOperand(0);
19185  VTSDNode *ExtFrom = cast<VTSDNode>(Offset.getOperand(2).getNode());
19186  EVT ExtFromEVT = ExtFrom->getVT().getVectorElementType();
19187 
19188  // If the predicate for the sign- or zero-extended offset is the
19189  // same as the predicate used for this load and the sign-/zero-extension
19190  // was from a 32-bits...
19191  if (ExtPg == Pg && ExtFromEVT == MVT::i32) {
19192  SDValue UnextendedOffset = Offset.getOperand(1);
19193 
19194  unsigned NewOpc = getGatherVecOpcode(Scaled, OffsetIsSExt, true);
19195  if (Signed)
19196  NewOpc = getSignExtendedGatherOpcode(NewOpc);
19197 
19198  return DAG.getNode(NewOpc, DL, {ResVT, MVT::Other},
19199  {Chain, Pg, Base, UnextendedOffset, Ty});
19200  }
19201  }
19202 
19203  return SDValue();
19204 }
19205 
19206 /// Optimize a vector shift instruction and its operand if shifted out
19207 /// bits are not used.
19209  const AArch64TargetLowering &TLI,
19211  assert(N->getOpcode() == AArch64ISD::VASHR ||
19212  N->getOpcode() == AArch64ISD::VLSHR);
19213 
19214  SDValue Op = N->getOperand(0);
19215  unsigned OpScalarSize = Op.getScalarValueSizeInBits();
19216 
19217  unsigned ShiftImm = N->getConstantOperandVal(1);
19218  assert(OpScalarSize > ShiftImm && "Invalid shift imm");
19219 
19220  APInt ShiftedOutBits = APInt::getLowBitsSet(OpScalarSize, ShiftImm);
19221  APInt DemandedMask = ~ShiftedOutBits;
19222 
19223  if (TLI.SimplifyDemandedBits(Op, DemandedMask, DCI))
19224  return SDValue(N, 0);
19225 
19226  return SDValue();
19227 }
19228 
19230  // sunpklo(sext(pred)) -> sext(extract_low_half(pred))
19231  // This transform works in partnership with performSetCCPunpkCombine to
19232  // remove unnecessary transfer of predicates into standard registers and back
19233  if (N->getOperand(0).getOpcode() == ISD::SIGN_EXTEND &&
19234  N->getOperand(0)->getOperand(0)->getValueType(0).getScalarType() ==
19235  MVT::i1) {
19236  SDValue CC = N->getOperand(0)->getOperand(0);
19237  auto VT = CC->getValueType(0).getHalfNumVectorElementsVT(*DAG.getContext());
19238  SDValue Unpk = DAG.getNode(ISD::EXTRACT_SUBVECTOR, SDLoc(N), VT, CC,
19239  DAG.getVectorIdxConstant(0, SDLoc(N)));
19240  return DAG.getNode(ISD::SIGN_EXTEND, SDLoc(N), N->getValueType(0), Unpk);
19241  }
19242 
19243  return SDValue();
19244 }
19245 
19246 /// Target-specific DAG combine function for post-increment LD1 (lane) and
19247 /// post-increment LD1R.
19250  bool IsLaneOp) {
19251  if (DCI.isBeforeLegalizeOps())
19252  return SDValue();
19253 
19254  SelectionDAG &DAG = DCI.DAG;
19255  EVT VT = N->getValueType(0);
19256 
19257  if (!VT.is128BitVector() && !VT.is64BitVector())
19258  return SDValue();
19259 
19260  unsigned LoadIdx = IsLaneOp ? 1 : 0;
19261  SDNode *LD = N->getOperand(LoadIdx).getNode();
19262  // If it is not LOAD, can not do such combine.
19263  if (LD->getOpcode() != ISD::LOAD)
19264  return SDValue();
19265 
19266  // The vector lane must be a constant in the LD1LANE opcode.
19267  SDValue Lane;
19268  if (IsLaneOp) {
19269  Lane = N->getOperand(2);
19270  auto *LaneC = dyn_cast<ConstantSDNode>(Lane);
19271  if (!LaneC || LaneC->getZExtValue() >= VT.getVectorNumElements())
19272  return SDValue();
19273  }
19274 
19275  LoadSDNode *LoadSDN = cast<LoadSDNode>(LD);
19276  EVT MemVT = LoadSDN->getMemoryVT();
19277  // Check if memory operand is the same type as the vector element.
19278  if (MemVT != VT.getVectorElementType())
19279  return SDValue();
19280 
19281  // Check if there are other uses. If so, do not combine as it will introduce
19282  // an extra load.
19283  for (SDNode::use_iterator UI = LD->use_begin(), UE = LD->use_end(); UI != UE;
19284  ++UI) {
19285  if (UI.getUse().getResNo() == 1) // Ignore uses of the chain result.
19286  continue;
19287  if (*UI != N)
19288  return SDValue();
19289  }
19290 
19291  SDValue Addr = LD->getOperand(1);
19292  SDValue Vector = N->getOperand(0);
19293  // Search for a use of the address operand that is an increment.
19294  for (SDNode::use_iterator UI = Addr.getNode()->use_begin(), UE =
19295  Addr.getNode()->use_end(); UI != UE; ++UI) {
19296  SDNode *User = *UI;
19297  if (User->getOpcode() != ISD::ADD
19298  || UI.getUse().getResNo() != Addr.getResNo())
19299  continue;
19300 
19301  // If the increment is a constant, it must match the memory ref size.
19302  SDValue Inc = User->getOperand(User->getOperand(0) == Addr ? 1 : 0);
19303  if (ConstantSDNode *CInc = dyn_cast<ConstantSDNode>(Inc.getNode())) {
19304  uint32_t IncVal = CInc->getZExtValue();
19305  unsigned NumBytes = VT.getScalarSizeInBits() / 8;
19306  if (IncVal != NumBytes)
19307  continue;
19308  Inc = DAG.getRegister(AArch64::XZR, MVT::i64);
19309  }
19310 
19311  // To avoid cycle construction make sure that neither the load nor the add
19312  // are predecessors to each other or the Vector.
19315  Visited.insert(Addr.getNode());
19316  Worklist.push_back(User);
19317  Worklist.push_back(LD);
19318  Worklist.push_back(Vector.getNode());
19319  if (SDNode::hasPredecessorHelper(LD, Visited, Worklist) ||
19320  SDNode::hasPredecessorHelper(User, Visited, Worklist))
19321  continue;
19322 
19324  Ops.push_back(LD->getOperand(0)); // Chain
19325  if (IsLaneOp) {
19326  Ops.push_back(Vector); // The vector to be inserted
19327  Ops.push_back(Lane); // The lane to be inserted in the vector
19328  }
19329  Ops.push_back(Addr);
19330  Ops.push_back(Inc);
19331 
19332  EVT Tys[3] = { VT, MVT::i64, MVT::Other };
19333  SDVTList SDTys = DAG.getVTList(Tys);
19334  unsigned NewOp = IsLaneOp ? AArch64ISD::LD1LANEpost : AArch64ISD::LD1DUPpost;
19335  SDValue UpdN = DAG.getMemIntrinsicNode(NewOp, SDLoc(N), SDTys, Ops,
19336  MemVT,
19337  LoadSDN->getMemOperand());
19338 
19339  // Update the uses.
19340  SDValue NewResults[] = {
19341  SDValue(LD, 0), // The result of load
19342  SDValue(UpdN.getNode(), 2) // Chain
19343  };
19344  DCI.CombineTo(LD, NewResults);
19345  DCI.CombineTo(N, SDValue(UpdN.getNode(), 0)); // Dup/Inserted Result
19346  DCI.CombineTo(User, SDValue(UpdN.getNode(), 1)); // Write back register
19347 
19348  break;
19349  }
19350  return SDValue();
19351 }
19352 
19353 /// Simplify ``Addr`` given that the top byte of it is ignored by HW during
19354 /// address translation.
19357  SelectionDAG &DAG) {
19358  APInt DemandedMask = APInt::getLowBitsSet(64, 56);
19359  KnownBits Known;
19361  !DCI.isBeforeLegalizeOps());
19362  const TargetLowering &TLI = DAG.getTargetLoweringInfo();
19363  if (TLI.SimplifyDemandedBits(Addr, DemandedMask, Known, TLO)) {
19364  DCI.CommitTargetLoweringOpt(TLO);
19365  return true;
19366  }
19367  return false;
19368 }
19369 
19371  assert((N->getOpcode() == ISD::STORE || N->getOpcode() == ISD::MSTORE) &&
19372  "Expected STORE dag node in input!");
19373 
19374  if (auto Store = dyn_cast<StoreSDNode>(N)) {
19375  if (!Store->isTruncatingStore() || Store->isIndexed())
19376  return SDValue();
19377  SDValue Ext = Store->getValue();
19378  auto ExtOpCode = Ext.getOpcode();
19379  if (ExtOpCode != ISD::ZERO_EXTEND && ExtOpCode != ISD::SIGN_EXTEND &&
19380  ExtOpCode != ISD::ANY_EXTEND)
19381  return SDValue();
19382  SDValue Orig = Ext->getOperand(0);
19383  if (Store->getMemoryVT() != Orig.getValueType())
19384  return SDValue();
19385  return DAG.getStore(Store->getChain(), SDLoc(Store), Orig,
19386  Store->getBasePtr(), Store->getMemOperand());
19387  }
19388 
19389  return SDValue();
19390 }
19391 
19392 // Perform TBI simplification if supported by the target and try to break up
19393 // nontemporal loads larger than 256-bits loads for odd types so LDNPQ 256-bit
19394 // load instructions can be selected.
19397  SelectionDAG &DAG,
19398  const AArch64Subtarget *Subtarget) {
19399  if (Subtarget->supportsAddressTopByteIgnored())
19400  performTBISimplification(N->getOperand(1), DCI, DAG);
19401 
19402  LoadSDNode *LD = cast<LoadSDNode>(N);
19403  EVT MemVT = LD->getMemoryVT();
19404  if (LD->isVolatile() || !LD->isNonTemporal() || !Subtarget->isLittleEndian())
19405  return SDValue(N, 0);
19406 
19407  if (MemVT.isScalableVector() || MemVT.getSizeInBits() <= 256 ||
19408  MemVT.getSizeInBits() % 256 == 0 ||
19409  256 % MemVT.getScalarSizeInBits() != 0)
19410  return SDValue(N, 0);
19411 
19412  SDLoc DL(LD);
19413  SDValue Chain = LD->getChain();
19414  SDValue BasePtr = LD->getBasePtr();
19415  SDNodeFlags Flags = LD->getFlags();
19417  SmallVector<SDValue, 4> LoadOpsChain;
19418  // Replace any non temporal load over 256-bit with a series of 256 bit loads
19419  // and a scalar/vector load less than 256. This way we can utilize 256-bit
19420  // loads and reduce the amount of load instructions generated.
19421  MVT NewVT =
19423  256 / MemVT.getVectorElementType().getSizeInBits());
19424  unsigned Num256Loads = MemVT.getSizeInBits() / 256;
19425  // Create all 256-bit loads starting from offset 0 and up to Num256Loads-1*32.
19426  for (unsigned I = 0; I < Num256Loads; I++) {
19427  unsigned PtrOffset = I * 32;
19428  SDValue NewPtr = DAG.getMemBasePlusOffset(
19429  BasePtr, TypeSize::Fixed(PtrOffset), DL, Flags);
19430  Align NewAlign = commonAlignment(LD->getAlign(), PtrOffset);
19431  SDValue NewLoad = DAG.getLoad(
19432  NewVT, DL, Chain, NewPtr, LD->getPointerInfo().getWithOffset(PtrOffset),
19433  NewAlign, LD->getMemOperand()->getFlags(), LD->getAAInfo());
19434  LoadOps.push_back(NewLoad);
19435  LoadOpsChain.push_back(SDValue(cast<SDNode>(NewLoad), 1));
19436  }
19437 
19438  // Process remaining bits of the load operation.
19439  // This is done by creating an UNDEF vector to match the size of the
19440  // 256-bit loads and inserting the remaining load to it. We extract the
19441  // original load type at the end using EXTRACT_SUBVECTOR instruction.
19442  unsigned BitsRemaining = MemVT.getSizeInBits() % 256;
19443  unsigned PtrOffset = (MemVT.getSizeInBits() - BitsRemaining) / 8;
19444  MVT RemainingVT = MVT::getVectorVT(
19446  BitsRemaining / MemVT.getVectorElementType().getSizeInBits());
19447  SDValue NewPtr =
19448  DAG.getMemBasePlusOffset(BasePtr, TypeSize::Fixed(PtrOffset), DL, Flags);
19449  Align NewAlign = commonAlignment(LD->getAlign(), PtrOffset);
19450  SDValue RemainingLoad =
19451  DAG.getLoad(RemainingVT, DL, Chain, NewPtr,
19452  LD->getPointerInfo().getWithOffset(PtrOffset), NewAlign,
19453  LD->getMemOperand()->getFlags(), LD->getAAInfo());
19454  SDValue UndefVector = DAG.getUNDEF(NewVT);
19455  SDValue InsertIdx = DAG.getVectorIdxConstant(0, DL);
19456  SDValue ExtendedReminingLoad =
19457  DAG.getNode(ISD::INSERT_SUBVECTOR, DL, NewVT,
19458  {UndefVector, RemainingLoad, InsertIdx});
19459  LoadOps.push_back(ExtendedReminingLoad);
19460  LoadOpsChain.push_back(SDValue(cast<SDNode>(RemainingLoad), 1));
19461  EVT ConcatVT =
19462  EVT::getVectorVT(*DAG.getContext(), MemVT.getScalarType(),
19463  LoadOps.size() * NewVT.getVectorNumElements());
19464  SDValue ConcatVectors =
19465  DAG.getNode(ISD::CONCAT_VECTORS, DL, ConcatVT, LoadOps);
19466  // Extract the original vector type size.
19467  SDValue ExtractSubVector =
19468  DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, MemVT,
19469  {ConcatVectors, DAG.getVectorIdxConstant(0, DL)});
19471  DAG.getNode(ISD::TokenFactor, DL, MVT::Other, LoadOpsChain);
19472  return DAG.getMergeValues({ExtractSubVector, TokenFactor}, DL);
19473 }
19474 
19477  SelectionDAG &DAG,
19478  const AArch64Subtarget *Subtarget) {
19479  StoreSDNode *ST = cast<StoreSDNode>(N);
19480  SDValue Chain = ST->getChain();
19481  SDValue Value = ST->getValue();
19482  SDValue Ptr = ST->getBasePtr();
19483  EVT ValueVT = Value.getValueType();
19484 
19485  auto hasValidElementTypeForFPTruncStore = [](EVT VT) {
19486  EVT EltVT = VT.getVectorElementType();
19487  return EltVT == MVT::f32 || EltVT == MVT::f64;
19488  };
19489 
19490  // If this is an FP_ROUND followed by a store, fold this into a truncating
19491  // store. We can do this even if this is already a truncstore.
19492  // We purposefully don't care about legality of the nodes here as we know
19493  // they can be split down into something legal.
19494  if (DCI.isBeforeLegalizeOps() && Value.getOpcode() == ISD::FP_ROUND &&
19495  Value.getNode()->hasOneUse() && ST->isUnindexed() &&
19496  Subtarget->useSVEForFixedLengthVectors() &&
19497  ValueVT.isFixedLengthVector() &&
19498  ValueVT.getFixedSizeInBits() >= Subtarget->getMinSVEVectorSizeInBits() &&
19499  hasValidElementTypeForFPTruncStore(Value.getOperand(0).getValueType()))
19500  return DAG.getTruncStore(Chain, SDLoc(N), Value.getOperand(0), Ptr,
19501  ST->getMemoryVT(), ST->getMemOperand());
19502 
19503  if (SDValue Split = splitStores(N, DCI, DAG, Subtarget))
19504  return Split;
19505 
19506  if (Subtarget->supportsAddressTopByteIgnored() &&
19507  performTBISimplification(N->getOperand(2), DCI, DAG))
19508  return SDValue(N, 0);
19509 
19510  if (SDValue Store = foldTruncStoreOfExt(DAG, N))
19511  return Store;
19512 
19513  return SDValue();
19514 }
19515 
19518  SelectionDAG &DAG,
19519  const AArch64Subtarget *Subtarget) {
19520  MaskedStoreSDNode *MST = cast<MaskedStoreSDNode>(N);
19521  SDValue Value = MST->getValue();
19522  SDValue Mask = MST->getMask();
19523  SDLoc DL(N);
19524 
19525  // If this is a UZP1 followed by a masked store, fold this into a masked
19526  // truncating store. We can do this even if this is already a masked
19527  // truncstore.
19528  if (Value.getOpcode() == AArch64ISD::UZP1 && Value->hasOneUse() &&
19529  MST->isUnindexed() && Mask->getOpcode() == AArch64ISD::PTRUE &&
19530  Value.getValueType().isInteger()) {
19531  Value = Value.getOperand(0);
19532  if (Value.getOpcode() == ISD::BITCAST) {
19533  EVT HalfVT =
19534  Value.getValueType().getHalfNumVectorElementsVT(*DAG.getContext());
19535  EVT InVT = Value.getOperand(0).getValueType();
19536 
19537  if (HalfVT.widenIntegerVectorElementType(*DAG.getContext()) == InVT) {
19538  unsigned MinSVESize = Subtarget->getMinSVEVectorSizeInBits();
19539  unsigned PgPattern = Mask->getConstantOperandVal(0);
19540 
19541  // Ensure we can double the size of the predicate pattern
19542  unsigned NumElts = getNumElementsFromSVEPredPattern(PgPattern);
19543  if (NumElts && NumElts * InVT.getVectorElementType().getSizeInBits() <=
19544  MinSVESize) {
19546  PgPattern);
19547  return DAG.getMaskedStore(MST->getChain(), DL, Value.getOperand(0),
19548  MST->getBasePtr(), MST->getOffset(), Mask,
19549  MST->getMemoryVT(), MST->getMemOperand(),
19550  MST->getAddressingMode(),
19551  /*IsTruncating=*/true);
19552  }
19553  }
19554  }
19555  }
19556 
19557  return SDValue();
19558 }
19559 
19560 /// \return true if part of the index was folded into the Base.
19561 static bool foldIndexIntoBase(SDValue &BasePtr, SDValue &Index, SDValue Scale,
19562  SDLoc DL, SelectionDAG &DAG) {
19563  // This function assumes a vector of i64 indices.
19564  EVT IndexVT = Index.getValueType();
19565  if (!IndexVT.isVector() || IndexVT.getVectorElementType() != MVT::i64)
19566  return false;
19567 
19568  // Simplify:
19569  // BasePtr = Ptr
19570  // Index = X + splat(Offset)
19571  // ->
19572  // BasePtr = Ptr + Offset * scale.
19573  // Index = X
19574  if (Index.getOpcode() == ISD::ADD) {
19575  if (auto Offset = DAG.getSplatValue(Index.getOperand(1))) {
19576  Offset = DAG.getNode(ISD::MUL, DL, MVT::i64, Offset, Scale);
19577  BasePtr = DAG.getNode(ISD::ADD, DL, MVT::i64, BasePtr, Offset);
19578  Index = Index.getOperand(0);
19579  return true;
19580  }
19581  }
19582 
19583  // Simplify:
19584  // BasePtr = Ptr
19585  // Index = (X + splat(Offset)) << splat(Shift)
19586  // ->
19587  // BasePtr = Ptr + (Offset << Shift) * scale)
19588  // Index = X << splat(shift)
19589  if (Index.getOpcode() == ISD::SHL &&
19590  Index.getOperand(0).getOpcode() == ISD::ADD) {
19591  SDValue Add = Index.getOperand(0);
19592  SDValue ShiftOp = Index.getOperand(1);
19593  SDValue OffsetOp = Add.getOperand(1);
19594  if (auto Shift = DAG.getSplatValue(ShiftOp))
19595  if (auto Offset = DAG.getSplatValue(OffsetOp)) {
19597  Offset = DAG.getNode(ISD::MUL, DL, MVT::i64, Offset, Scale);
19598  BasePtr = DAG.getNode(ISD::ADD, DL, MVT::i64, BasePtr, Offset);
19599  Index = DAG.getNode(ISD::SHL, DL, Index.getValueType(),
19600  Add.getOperand(0), ShiftOp);
19601  return true;
19602  }
19603  }
19604 
19605  return false;
19606 }
19607 
19608 // Analyse the specified address returning true if a more optimal addressing
19609 // mode is available. When returning true all parameters are updated to reflect
19610 // their recommended values.
19612  SDValue &BasePtr, SDValue &Index,
19613  SelectionDAG &DAG) {
19614  // Try to iteratively fold parts of the index into the base pointer to
19615  // simplify the index as much as possible.
19616  bool Changed = false;
19617  while (foldIndexIntoBase(BasePtr, Index, N->getScale(), SDLoc(N), DAG))
19618  Changed = true;
19619 
19620  // Only consider element types that are pointer sized as smaller types can
19621  // be easily promoted.
19622  EVT IndexVT = Index.getValueType();
19623  if (IndexVT.getVectorElementType() != MVT::i64 || IndexVT == MVT::nxv2i64)
19624  return Changed;
19625 
19626  // Can indices be trivially shrunk?
19627  EVT DataVT = N->getOperand(1).getValueType();
19628  // Don't attempt to shrink the index for fixed vectors of 64 bit data since it
19629  // will later be re-extended to 64 bits in legalization
19630  if (DataVT.isFixedLengthVector() && DataVT.getScalarSizeInBits() == 64)
19631  return Changed;
19632  if (ISD::isVectorShrinkable(Index.getNode(), 32, N->isIndexSigned())) {
19633  EVT NewIndexVT = IndexVT.changeVectorElementType(MVT::i32);
19634  Index = DAG.getNode(ISD::TRUNCATE, SDLoc(N), NewIndexVT, Index);
19635  return true;
19636  }
19637 
19638  // Match:
19639  // Index = step(const)
19640  int64_t Stride = 0;
19641  if (Index.getOpcode() == ISD::STEP_VECTOR) {
19642  Stride = cast<ConstantSDNode>(Index.getOperand(0))->getSExtValue();
19643  }
19644  // Match:
19645  // Index = step(const) << shift(const)
19646  else if (Index.getOpcode() == ISD::SHL &&
19647  Index.getOperand(0).getOpcode() == ISD::STEP_VECTOR) {
19648  SDValue RHS = Index.getOperand(1);
19649  if (auto *Shift =
19650  dyn_cast_or_null<ConstantSDNode>(DAG.getSplatValue(RHS))) {
19651  int64_t Step = (int64_t)Index.getOperand(0).getConstantOperandVal(1);
19652  Stride = Step << Shift->getZExtValue();
19653  }
19654  }
19655 
19656  // Return early because no supported pattern is found.
19657  if (Stride == 0)
19658  return Changed;
19659 
19660  if (Stride < std::numeric_limits<int32_t>::min() ||
19662  return Changed;
19663 
19664  const auto &Subtarget = DAG.getSubtarget<AArch64Subtarget>();
19665  unsigned MaxVScale =
19667  int64_t LastElementOffset =
19668  IndexVT.getVectorMinNumElements() * Stride * MaxVScale;
19669 
19670  if (LastElementOffset < std::numeric_limits<int32_t>::min() ||
19671  LastElementOffset > std::numeric_limits<int32_t>::max())
19672  return Changed;
19673 
19674  EVT NewIndexVT = IndexVT.changeVectorElementType(MVT::i32);
19675  // Stride does not scale explicitly by 'Scale', because it happens in
19676  // the gather/scatter addressing mode.
19677  Index = DAG.getStepVector(SDLoc(N), NewIndexVT, APInt(32, Stride));
19678  return true;
19679 }
19680 
19683  MaskedGatherScatterSDNode *MGS = cast<MaskedGatherScatterSDNode>(N);
19684  assert(MGS && "Can only combine gather load or scatter store nodes");
19685 
19686  if (!DCI.isBeforeLegalize())
19687  return SDValue();
19688 
19689  SDLoc DL(MGS);
19690  SDValue Chain = MGS->getChain();
19691  SDValue Scale = MGS->getScale();
19692  SDValue Index = MGS->getIndex();
19693  SDValue Mask = MGS->getMask();
19694  SDValue BasePtr = MGS->getBasePtr();
19695  ISD::MemIndexType IndexType = MGS->getIndexType();
19696 
19697  if (!findMoreOptimalIndexType(MGS, BasePtr, Index, DAG))
19698  return SDValue();
19699 
19700  // Here we catch such cases early and change MGATHER's IndexType to allow
19701  // the use of an Index that's more legalisation friendly.
19702  if (auto *MGT = dyn_cast<MaskedGatherSDNode>(MGS)) {
19703  SDValue PassThru = MGT->getPassThru();
19704  SDValue Ops[] = {Chain, PassThru, Mask, BasePtr, Index, Scale};
19705  return DAG.getMaskedGather(
19706  DAG.getVTList(N->getValueType(0), MVT::Other), MGT->getMemoryVT(), DL,
19707  Ops, MGT->getMemOperand(), IndexType, MGT->getExtensionType());
19708  }
19709  auto *MSC = cast<MaskedScatterSDNode>(MGS);
19710  SDValue Data = MSC->getValue();
19711  SDValue Ops[] = {Chain, Data, Mask, BasePtr, Index, Scale};
19712  return DAG.getMaskedScatter(DAG.getVTList(MVT::Other), MSC->getMemoryVT(), DL,
19713  Ops, MSC->getMemOperand(), IndexType,
19714  MSC->isTruncatingStore());
19715 }
19716 
19717 /// Target-specific DAG combine function for NEON load/store intrinsics
19718 /// to merge base address updates.
19721  SelectionDAG &DAG) {
19722  if (DCI.isBeforeLegalize() || DCI.isCalledByLegalizer())
19723  return SDValue();
19724 
19725  unsigned AddrOpIdx = N->getNumOperands() - 1;
19726  SDValue Addr = N->getOperand(AddrOpIdx);
19727 
19728  // Search for a use of the address operand that is an increment.
19729  for (SDNode::use_iterator UI = Addr.getNode()->use_begin(),
19730  UE = Addr.getNode()->use_end(); UI != UE; ++UI) {
19731  SDNode *User = *UI;
19732  if (User->getOpcode() != ISD::ADD ||
19733  UI.getUse().getResNo() != Addr.getResNo())
19734  continue;
19735 
19736  // Check that the add is independent of the load/store. Otherwise, folding
19737  // it would create a cycle.
19740  Visited.insert(Addr.getNode());
19741  Worklist.push_back(N);
19742  Worklist.push_back(User);
19743  if (SDNode::hasPredecessorHelper(N, Visited, Worklist) ||
19744  SDNode::hasPredecessorHelper(User, Visited, Worklist))
19745  continue;
19746 
19747  // Find the new opcode for the updating load/store.
19748  bool IsStore = false;
19749  bool IsLaneOp = false;
19750  bool IsDupOp = false;
19751  unsigned NewOpc = 0;
19752  unsigned NumVecs = 0;
19753  unsigned IntNo = cast<ConstantSDNode>(N->getOperand(1))->getZExtValue();
19754  switch (IntNo) {
19755  default: llvm_unreachable("unexpected intrinsic for Neon base update");
19756  case Intrinsic::aarch64_neon_ld2: NewOpc = AArch64ISD::LD2post;
19757  NumVecs = 2; break;
19758  case Intrinsic::aarch64_neon_ld3: NewOpc = AArch64ISD::LD3post;
19759  NumVecs = 3; break;
19760  case Intrinsic::aarch64_neon_ld4: NewOpc = AArch64ISD::LD4post;
19761  NumVecs = 4; break;
19762  case Intrinsic::aarch64_neon_st2: NewOpc = AArch64ISD::ST2post;
19763  NumVecs = 2; IsStore = true; break;
19764  case Intrinsic::aarch64_neon_st3: NewOpc = AArch64ISD::ST3post;
19765  NumVecs = 3; IsStore = true; break;
19766  case Intrinsic::aarch64_neon_st4: NewOpc = AArch64ISD::ST4post;
19767  NumVecs = 4; IsStore = true; break;
19768  case Intrinsic::aarch64_neon_ld1x2: NewOpc = AArch64ISD::LD1x2post;
19769  NumVecs = 2; break;
19770  case Intrinsic::aarch64_neon_ld1x3: NewOpc = AArch64ISD::LD1x3post;
19771  NumVecs = 3; break;
19772  case Intrinsic::aarch64_neon_ld1x4: NewOpc = AArch64ISD::LD1x4post;
19773  NumVecs = 4; break;
19774  case Intrinsic::aarch64_neon_st1x2: NewOpc = AArch64ISD::ST1x2post;
19775  NumVecs = 2; IsStore = true; break;
19776  case Intrinsic::aarch64_neon_st1x3: NewOpc = AArch64ISD::ST1x3post;
19777  NumVecs = 3; IsStore = true; break;
19778  case Intrinsic::aarch64_neon_st1x4: NewOpc = AArch64ISD::ST1x4post;
19779  NumVecs = 4; IsStore = true; break;
19780  case Intrinsic::aarch64_neon_ld2r: NewOpc = AArch64ISD::LD2DUPpost;
19781  NumVecs = 2; IsDupOp = true; break;
19782  case Intrinsic::aarch64_neon_ld3r: NewOpc = AArch64ISD::LD3DUPpost;
19783  NumVecs = 3; IsDupOp = true; break;
19784  case Intrinsic::aarch64_neon_ld4r: NewOpc = AArch64ISD::LD4DUPpost;
19785  NumVecs = 4; IsDupOp = true; break;
19786  case Intrinsic::aarch64_neon_ld2lane: NewOpc = AArch64ISD::LD2LANEpost;
19787  NumVecs = 2; IsLaneOp = true; break;
19788  case Intrinsic::aarch64_neon_ld3lane: NewOpc = AArch64ISD::LD3LANEpost;
19789  NumVecs = 3; IsLaneOp = true; break;
19790  case Intrinsic::aarch64_neon_ld4lane: NewOpc = AArch64ISD::LD4LANEpost;
19791  NumVecs = 4; IsLaneOp = true; break;
19792  case Intrinsic::aarch64_neon_st2lane: NewOpc = AArch64ISD::ST2LANEpost;
19793  NumVecs = 2; IsStore = true; IsLaneOp = true; break;
19794  case Intrinsic::aarch64_neon_st3lane: NewOpc = AArch64ISD::ST3LANEpost;
19795  NumVecs = 3; IsStore = true; IsLaneOp = true; break;
19796  case Intrinsic::aarch64_neon_st4lane: NewOpc = AArch64ISD::ST4LANEpost;
19797  NumVecs = 4; IsStore = true; IsLaneOp = true; break;
19798  }
19799 
19800  EVT VecTy;
19801  if (IsStore)
19802  VecTy = N->getOperand(2).getValueType();
19803  else
19804  VecTy = N->getValueType(0);
19805 
19806  // If the increment is a constant, it must match the memory ref size.
19807  SDValue Inc = User->getOperand(User->getOperand(0) == Addr ? 1 : 0);
19808  if (ConstantSDNode *CInc = dyn_cast<ConstantSDNode>(Inc.getNode())) {
19809  uint32_t IncVal = CInc->getZExtValue();
19810  unsigned NumBytes = NumVecs * VecTy.getSizeInBits() / 8;
19811  if (IsLaneOp || IsDupOp)
19812  NumBytes /= VecTy.getVectorNumElements();
19813  if (IncVal != NumBytes)
19814  continue;
19815  Inc = DAG.getRegister(AArch64::XZR, MVT::i64);
19816  }
19818  Ops.push_back(N->getOperand(0)); // Incoming chain
19819  // Load lane and store have vector list as input.
19820  if (IsLaneOp || IsStore)
19821  for (unsigned i = 2; i < AddrOpIdx; ++i)
19822  Ops.push_back(N->getOperand(i));
19823  Ops.push_back(Addr); // Base register
19824  Ops.push_back(Inc);
19825 
19826  // Return Types.
19827  EVT Tys[6];
19828  unsigned NumResultVecs = (IsStore ? 0 : NumVecs);
19829  unsigned n;
19830  for (n = 0; n < NumResultVecs; ++n)
19831  Tys[n] = VecTy;
19832  Tys[n++] = MVT::i64; // Type of write back register
19833  Tys[n] = MVT::Other; // Type of the chain
19834  SDVTList SDTys = DAG.getVTList(ArrayRef(Tys, NumResultVecs + 2));
19835 
19836  MemIntrinsicSDNode *MemInt = cast<MemIntrinsicSDNode>(N);
19837  SDValue UpdN = DAG.getMemIntrinsicNode(NewOpc, SDLoc(N), SDTys, Ops,
19838  MemInt->getMemoryVT(),
19839  MemInt->getMemOperand());
19840 
19841  // Update the uses.
19842  std::vector<SDValue> NewResults;
19843  for (unsigned i = 0; i < NumResultVecs; ++i) {
19844  NewResults.push_back(SDValue(UpdN.getNode(), i));
19845  }
19846  NewResults.push_back(SDValue(UpdN.getNode(), NumResultVecs + 1));
19847  DCI.CombineTo(N, NewResults);
19848  DCI.CombineTo(User, SDValue(UpdN.getNode(), NumResultVecs));
19849 
19850  break;
19851  }
19852  return SDValue();
19853 }
19854 
19855 // Checks to see if the value is the prescribed width and returns information
19856 // about its extension mode.
19857 static
19858 bool checkValueWidth(SDValue V, unsigned width, ISD::LoadExtType &ExtType) {
19859  ExtType = ISD::NON_EXTLOAD;
19860  switch(V.getNode()->getOpcode()) {
19861  default:
19862  return false;
19863  case ISD::LOAD: {
19864  LoadSDNode *LoadNode = cast<LoadSDNode>(V.getNode());
19865  if ((LoadNode->getMemoryVT() == MVT::i8 && width == 8)
19866  || (LoadNode->getMemoryVT() == MVT::i16 && width == 16)) {
19867  ExtType = LoadNode->getExtensionType();
19868  return true;
19869  }
19870  return false;
19871  }
19872  case ISD::AssertSext: {
19873  VTSDNode *TypeNode = cast<VTSDNode>(V.getNode()->getOperand(1));
19874  if ((TypeNode->getVT() == MVT::i8 && width == 8)
19875  || (TypeNode->getVT() == MVT::i16 && width == 16)) {
19876  ExtType = ISD::SEXTLOAD;
19877  return true;
19878  }
19879  return false;
19880  }
19881  case ISD::AssertZext: {
19882  VTSDNode *TypeNode = cast<VTSDNode>(V.getNode()->getOperand(1));
19883  if ((TypeNode->getVT() == MVT::i8 && width == 8)
19884  || (TypeNode->getVT() == MVT::i16 && width == 16)) {
19885  ExtType = ISD::ZEXTLOAD;
19886  return true;
19887  }
19888  return false;
19889  }
19890  case ISD::Constant:
19891  case ISD::TargetConstant: {
19892  return std::abs(cast<ConstantSDNode>(V.getNode())->getSExtValue()) <
19893  1LL << (width - 1);
19894  }
19895  }
19896 
19897  return true;
19898 }
19899 
19900 // This function does a whole lot of voodoo to determine if the tests are
19901 // equivalent without and with a mask. Essentially what happens is that given a
19902 // DAG resembling:
19903 //
19904 // +-------------+ +-------------+ +-------------+ +-------------+
19905 // | Input | | AddConstant | | CompConstant| | CC |
19906 // +-------------+ +-------------+ +-------------+ +-------------+
19907 // | | | |
19908 // V V | +----------+
19909 // +-------------+ +----+ | |
19910 // | ADD | |0xff| | |
19911 // +-------------+ +----+ | |
19912 // | | | |
19913 // V V | |
19914 // +-------------+ | |
19915 // | AND | | |
19916 // +-------------+ | |
19917 // | | |
19918 // +-----+ | |
19919 // | | |
19920 // V V V
19921 // +-------------+
19922 // | CMP |
19923 // +-------------+
19924 //
19925 // The AND node may be safely removed for some combinations of inputs. In
19926 // particular we need to take into account the extension type of the Input,
19927 // the exact values of AddConstant, CompConstant, and CC, along with the nominal
19928 // width of the input (this can work for any width inputs, the above graph is
19929 // specific to 8 bits.
19930 //
19931 // The specific equations were worked out by generating output tables for each
19932 // AArch64CC value in terms of and AddConstant (w1), CompConstant(w2). The
19933 // problem was simplified by working with 4 bit inputs, which means we only
19934 // needed to reason about 24 distinct bit patterns: 8 patterns unique to zero
19935 // extension (8,15), 8 patterns unique to sign extensions (-8,-1), and 8
19936 // patterns present in both extensions (0,7). For every distinct set of
19937 // AddConstant and CompConstants bit patterns we can consider the masked and
19938 // unmasked versions to be equivalent if the result of this function is true for
19939 // all 16 distinct bit patterns of for the current extension type of Input (w0).
19940 //
19941 // sub w8, w0, w1
19942 // and w10, w8, #0x0f
19943 // cmp w8, w2
19944 // cset w9, AArch64CC
19945 // cmp w10, w2
19946 // cset w11, AArch64CC
19947 // cmp w9, w11
19948 // cset w0, eq
19949 // ret
19950 //
19951 // Since the above function shows when the outputs are equivalent it defines
19952 // when it is safe to remove the AND. Unfortunately it only runs on AArch64 and
19953 // would be expensive to run during compiles. The equations below were written
19954 // in a test harness that confirmed they gave equivalent outputs to the above
19955 // for all inputs function, so they can be used determine if the removal is
19956 // legal instead.
19957 //
19958 // isEquivalentMaskless() is the code for testing if the AND can be removed
19959 // factored out of the DAG recognition as the DAG can take several forms.
19960 
19961 static bool isEquivalentMaskless(unsigned CC, unsigned width,
19962  ISD::LoadExtType ExtType, int AddConstant,
19963  int CompConstant) {
19964  // By being careful about our equations and only writing the in term
19965  // symbolic values and well known constants (0, 1, -1, MaxUInt) we can
19966  // make them generally applicable to all bit widths.
19967  int MaxUInt = (1 << width);
19968 
19969  // For the purposes of these comparisons sign extending the type is
19970  // equivalent to zero extending the add and displacing it by half the integer
19971  // width. Provided we are careful and make sure our equations are valid over
19972  // the whole range we can just adjust the input and avoid writing equations
19973  // for sign extended inputs.
19974  if (ExtType == ISD::SEXTLOAD)
19975  AddConstant -= (1 << (width-1));
19976 
19977  switch(CC) {
19978  case AArch64CC::LE:
19979  case AArch64CC::GT:
19980  if ((AddConstant == 0) ||
19981  (CompConstant == MaxUInt - 1 && AddConstant < 0) ||
19982  (AddConstant >= 0 && CompConstant < 0) ||
19983  (AddConstant <= 0 && CompConstant <= 0 && CompConstant < AddConstant))
19984  return true;
19985  break;
19986  case AArch64CC::LT:
19987  case AArch64CC::GE:
19988  if ((AddConstant == 0) ||
19989  (AddConstant >= 0 && CompConstant <= 0) ||
19990  (AddConstant <= 0 && CompConstant <= 0 && CompConstant <= AddConstant))
19991  return true;
19992  break;
19993  case AArch64CC::HI:
19994  case AArch64CC::LS:
19995  if ((AddConstant >= 0 && CompConstant < 0) ||
19996  (AddConstant <= 0 && CompConstant >= -1 &&
19997  CompConstant < AddConstant + MaxUInt))
19998  return true;
19999  break;
20000  case AArch64CC::PL:
20001  case AArch64CC::MI:
20002  if ((AddConstant == 0) ||
20003  (AddConstant > 0 && CompConstant <= 0) ||
20004  (AddConstant < 0 && CompConstant <= AddConstant))
20005  return true;
20006  break;
20007  case AArch64CC::LO:
20008  case AArch64CC::HS:
20009  if ((AddConstant >= 0 && CompConstant <= 0) ||
20010  (AddConstant <= 0 && CompConstant >= 0 &&
20011  CompConstant <= AddConstant + MaxUInt))
20012  return true;
20013  break;
20014  case AArch64CC::EQ:
20015  case AArch64CC::NE:
20016  if ((AddConstant > 0 && CompConstant < 0) ||
20017  (AddConstant < 0 && CompConstant >= 0 &&
20018  CompConstant < AddConstant + MaxUInt) ||
20019  (AddConstant >= 0 && CompConstant >= 0 &&
20020  CompConstant >= AddConstant) ||
20021  (AddConstant <= 0 && CompConstant < 0 && CompConstant < AddConstant))
20022  return true;
20023  break;
20024  case AArch64CC::VS:
20025  case AArch64CC::VC:
20026  case AArch64CC::AL:
20027  case AArch64CC::NV:
20028  return true;
20029  case AArch64CC::Invalid:
20030  break;
20031  }
20032 
20033  return false;
20034 }
20035 
20036 // (X & C) >u Mask --> (X & (C & (~Mask)) != 0
20037 // (X & C) <u Pow2 --> (X & (C & ~(Pow2-1)) == 0
20039  SDNode *AndNode, SelectionDAG &DAG,
20040  unsigned CCIndex, unsigned CmpIndex,
20041  unsigned CC) {
20042  ConstantSDNode *SubsC = dyn_cast<ConstantSDNode>(SubsNode->getOperand(1));
20043  if (!SubsC)
20044  return SDValue();
20045 
20046  APInt SubsAP = SubsC->getAPIntValue();
20047  if (CC == AArch64CC::HI) {
20048  if (!SubsAP.isMask())
20049  return SDValue();
20050  } else if (CC == AArch64CC::LO) {
20051  if (!SubsAP.isPowerOf2())
20052  return SDValue();
20053  } else
20054  return SDValue();
20055 
20056  ConstantSDNode *AndC = dyn_cast<ConstantSDNode>(AndNode->getOperand(1));
20057  if (!AndC)
20058  return SDValue();
20059 
20060  APInt MaskAP = CC == AArch64CC::HI ? SubsAP : (SubsAP - 1);
20061 
20062  SDLoc DL(N);
20063  APInt AndSMask = (~MaskAP) & AndC->getAPIntValue();
20064  SDValue ANDS = DAG.getNode(
20065  AArch64ISD::ANDS, DL, SubsNode->getVTList(), AndNode->getOperand(0),
20066  DAG.getConstant(AndSMask, DL, SubsC->getValueType(0)));
20067  SDValue AArch64_CC =
20069  N->getOperand(CCIndex)->getValueType(0));
20070 
20071  // For now, only performCSELCombine and performBRCONDCombine call this
20072  // function. And both of them pass 2 for CCIndex, 3 for CmpIndex with 4
20073  // operands. So just init the ops direct to simplify the code. If we have some
20074  // other case with different CCIndex, CmpIndex, we need to use for loop to
20075  // rewrite the code here.
20076  // TODO: Do we need to assert number of operand is 4 here?
20077  assert((CCIndex == 2 && CmpIndex == 3) &&
20078  "Expected CCIndex to be 2 and CmpIndex to be 3.");
20079  SDValue Ops[] = {N->getOperand(0), N->getOperand(1), AArch64_CC,
20080  ANDS.getValue(1)};
20081  return DAG.getNode(N->getOpcode(), N, N->getVTList(), Ops);
20082 }
20083 
20084 static
20087  SelectionDAG &DAG, unsigned CCIndex,
20088  unsigned CmpIndex) {
20089  unsigned CC = cast<ConstantSDNode>(N->getOperand(CCIndex))->getSExtValue();
20090  SDNode *SubsNode = N->getOperand(CmpIndex).getNode();
20091  unsigned CondOpcode = SubsNode->getOpcode();
20092 
20093  if (CondOpcode != AArch64ISD::SUBS || SubsNode->hasAnyUseOfValue(0))
20094  return SDValue();
20095 
20096  // There is a SUBS feeding this condition. Is it fed by a mask we can
20097  // use?
20098 
20099  SDNode *AndNode = SubsNode->getOperand(0).getNode();
20100  unsigned MaskBits = 0;
20101 
20102  if (AndNode->getOpcode() != ISD::AND)
20103  return SDValue();
20104 
20105  if (SDValue Val = performSubsToAndsCombine(N, SubsNode, AndNode, DAG, CCIndex,
20106  CmpIndex, CC))
20107  return Val;
20108 
20109  if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(AndNode->getOperand(1))) {
20110  uint32_t CNV = CN->getZExtValue();
20111  if (CNV == 255)
20112  MaskBits = 8;
20113  else if (CNV == 65535)
20114  MaskBits = 16;
20115  }
20116 
20117  if (!MaskBits)
20118  return SDValue();
20119 
20120  SDValue AddValue = AndNode->getOperand(0);
20121 
20122  if (AddValue.getOpcode() != ISD::ADD)
20123  return SDValue();
20124 
20125  // The basic dag structure is correct, grab the inputs and validate them.
20126 
20127  SDValue AddInputValue1 = AddValue.getNode()->getOperand(0);
20128  SDValue AddInputValue2 = AddValue.getNode()->getOperand(1);
20129  SDValue SubsInputValue = SubsNode->getOperand(1);
20130 
20131  // The mask is present and the provenance of all the values is a smaller type,
20132  // lets see if the mask is superfluous.
20133 
20134  if (!isa<ConstantSDNode>(AddInputValue2.getNode()) ||
20135  !isa<ConstantSDNode>(SubsInputValue.getNode()))
20136  return SDValue();
20137 
20138  ISD::LoadExtType ExtType;
20139 
20140  if (!checkValueWidth(SubsInputValue, MaskBits, ExtType) ||
20141  !checkValueWidth(AddInputValue2, MaskBits, ExtType) ||
20142  !checkValueWidth(AddInputValue1, MaskBits, ExtType) )
20143  return SDValue();
20144 
20145  if(!isEquivalentMaskless(CC, MaskBits, ExtType,
20146  cast<ConstantSDNode>(AddInputValue2.getNode())->getSExtValue(),
20147  cast<ConstantSDNode>(SubsInputValue.getNode())->getSExtValue()))
20148  return SDValue();
20149 
20150  // The AND is not necessary, remove it.
20151 
20152  SDVTList VTs = DAG.getVTList(SubsNode->getValueType(0),
20153  SubsNode->getValueType(1));
20154  SDValue Ops[] = { AddValue, SubsNode->getOperand(1) };
20155 
20156  SDValue NewValue = DAG.getNode(CondOpcode, SDLoc(SubsNode), VTs, Ops);
20157  DAG.ReplaceAllUsesWith(SubsNode, NewValue.getNode());
20158 
20159  return SDValue(N, 0);
20160 }
20161 
20162 // Optimize compare with zero and branch.
20165  SelectionDAG &DAG) {
20166  MachineFunction &MF = DAG.getMachineFunction();
20167  // Speculation tracking/SLH assumes that optimized TB(N)Z/CB(N)Z instructions
20168  // will not be produced, as they are conditional branch instructions that do
20169  // not set flags.
20170  if (MF.getFunction().hasFnAttribute(Attribute::SpeculativeLoadHardening))
20171  return SDValue();
20172 
20173  if (SDValue NV = performCONDCombine(N, DCI, DAG, 2, 3))
20174  N = NV.getNode();
20175  SDValue Chain = N->getOperand(0);
20176  SDValue Dest = N->getOperand(1);
20177  SDValue CCVal = N->getOperand(2);
20178  SDValue Cmp = N->getOperand(3);
20179 
20180  assert(isa<ConstantSDNode>(CCVal) && "Expected a ConstantSDNode here!");
20181  unsigned CC = cast<ConstantSDNode>(CCVal)->getZExtValue();
20182  if (CC != AArch64CC::EQ && CC != AArch64CC::NE)
20183  return SDValue();
20184 
20185  unsigned CmpOpc = Cmp.getOpcode();
20186  if (CmpOpc != AArch64ISD::ADDS && CmpOpc != AArch64ISD::SUBS)
20187  return SDValue();
20188 
20189  // Only attempt folding if there is only one use of the flag and no use of the
20190  // value.
20191  if (!Cmp->hasNUsesOfValue(0, 0) || !Cmp->hasNUsesOfValue(1, 1))
20192  return SDValue();
20193 
20194  SDValue LHS = Cmp.getOperand(0);
20195  SDValue RHS = Cmp.getOperand(1);
20196 
20197  assert(LHS.getValueType() == RHS.getValueType() &&
20198  "Expected the value type to be the same for both operands!");
20199  if (LHS.getValueType() != MVT::i32 && LHS.getValueType() != MVT::i64)
20200  return SDValue();
20201 
20202  if (isNullConstant(LHS))
20203  std::swap(LHS, RHS);
20204 
20205  if (!isNullConstant(RHS))
20206  return SDValue();
20207 
20208  if (LHS.getOpcode() == ISD::SHL || LHS.getOpcode() == ISD::SRA ||
20209  LHS.getOpcode() == ISD::SRL)
20210  return SDValue();
20211 
20212  // Fold the compare into the branch instruction.
20213  SDValue BR;
20214  if (CC == AArch64CC::EQ)
20215  BR = DAG.getNode(AArch64ISD::CBZ, SDLoc(N), MVT::Other, Chain, LHS, Dest);
20216  else
20217  BR = DAG.getNode(AArch64ISD::CBNZ, SDLoc(N), MVT::Other, Chain, LHS, Dest);
20218 
20219  // Do not add new nodes to DAG combiner worklist.
20220  DCI.CombineTo(N, BR, false);
20221 
20222  return SDValue();
20223 }
20224 
20226  unsigned CC = N->getConstantOperandVal(2);
20227  SDValue SUBS = N->getOperand(3);
20228  SDValue Zero, CTTZ;
20229 
20230  if (CC == AArch64CC::EQ && SUBS.getOpcode() == AArch64ISD::SUBS) {
20231  Zero = N->getOperand(0);
20232  CTTZ = N->getOperand(1);
20233  } else if (CC == AArch64CC::NE && SUBS.getOpcode() == AArch64ISD::SUBS) {
20234  Zero = N->getOperand(1);
20235  CTTZ = N->getOperand(0);
20236  } else
20237  return SDValue();
20238 
20239  if ((CTTZ.getOpcode() != ISD::CTTZ && CTTZ.getOpcode() != ISD::TRUNCATE) ||
20240  (CTTZ.getOpcode() == ISD::TRUNCATE &&
20241  CTTZ.getOperand(0).getOpcode() != ISD::CTTZ))
20242  return SDValue();
20243 
20244  assert((CTTZ.getValueType() == MVT::i32 || CTTZ.getValueType() == MVT::i64) &&
20245  "Illegal type in CTTZ folding");
20246 
20247  if (!isNullConstant(Zero) || !isNullConstant(SUBS.getOperand(1)))
20248  return SDValue();
20249 
20250  SDValue X = CTTZ.getOpcode() == ISD::TRUNCATE
20251  ? CTTZ.getOperand(0).getOperand(0)
20252  : CTTZ.getOperand(0);
20253 
20254  if (X != SUBS.getOperand(0))
20255  return SDValue();
20256 
20257  unsigned BitWidth = CTTZ.getOpcode() == ISD::TRUNCATE
20258  ? CTTZ.getOperand(0).getValueSizeInBits()
20259  : CTTZ.getValueSizeInBits();
20260  SDValue BitWidthMinusOne =
20261  DAG.getConstant(BitWidth - 1, SDLoc(N), CTTZ.getValueType());
20262  return DAG.getNode(ISD::AND, SDLoc(N), CTTZ.getValueType(), CTTZ,
20263  BitWidthMinusOne);
20264 }
20265 
20266 // (CSEL l r EQ (CMP (CSEL x y cc2 cond) x)) => (CSEL l r cc2 cond)
20267 // (CSEL l r EQ (CMP (CSEL x y cc2 cond) y)) => (CSEL l r !cc2 cond)
20268 // Where x and y are constants and x != y
20269 
20270 // (CSEL l r NE (CMP (CSEL x y cc2 cond) x)) => (CSEL l r !cc2 cond)
20271 // (CSEL l r NE (CMP (CSEL x y cc2 cond) y)) => (CSEL l r cc2 cond)
20272 // Where x and y are constants and x != y
20274  SDValue L = Op->getOperand(0);
20275  SDValue R = Op->getOperand(1);
20276  AArch64CC::CondCode OpCC =
20277  static_cast<AArch64CC::CondCode>(Op->getConstantOperandVal(2));
20278 
20279  SDValue OpCmp = Op->getOperand(3);
20280  if (!isCMP(OpCmp))
20281  return SDValue();
20282 
20283  SDValue CmpLHS = OpCmp.getOperand(0);
20284  SDValue CmpRHS = OpCmp.getOperand(1);
20285 
20286  if (CmpRHS.getOpcode() == AArch64ISD::CSEL)
20287  std::swap(CmpLHS, CmpRHS);
20288  else if (CmpLHS.getOpcode() != AArch64ISD::CSEL)
20289  return SDValue();
20290 
20291  SDValue X = CmpLHS->getOperand(0);
20292  SDValue Y = CmpLHS->getOperand(1);
20293  if (!isa<ConstantSDNode>(X) || !isa<ConstantSDNode>(Y) || X == Y) {
20294  return SDValue();
20295  }
20296 
20297  // If one of the constant is opaque constant, x,y sdnode is still different
20298  // but the real value maybe the same. So check APInt here to make sure the
20299  // code is correct.
20300  ConstantSDNode *CX = cast<ConstantSDNode>(X);
20301  ConstantSDNode *CY = cast<ConstantSDNode>(Y);
20302  if (CX->getAPIntValue() == CY->getAPIntValue())
20303  return SDValue();
20304 
20306  static_cast<AArch64CC::CondCode>(CmpLHS->getConstantOperandVal(2));
20307  SDValue Cond = CmpLHS->getOperand(3);
20308 
20309  if (CmpRHS == Y)
20311  else if (CmpRHS != X)
20312  return SDValue();
20313 
20314  if (OpCC == AArch64CC::NE)
20316  else if (OpCC != AArch64CC::EQ)
20317  return SDValue();
20318 
20319  SDLoc DL(Op);
20320  EVT VT = Op->getValueType(0);
20321 
20322  SDValue CCValue = DAG.getConstant(CC, DL, MVT::i32);
20323  return DAG.getNode(AArch64ISD::CSEL, DL, VT, L, R, CCValue, Cond);
20324 }
20325 
20326 // Optimize CSEL instructions
20329  SelectionDAG &DAG) {
20330  // CSEL x, x, cc -> x
20331  if (N->getOperand(0) == N->getOperand(1))
20332  return N->getOperand(0);
20333 
20334  if (SDValue R = foldCSELOfCSEL(N, DAG))
20335  return R;
20336 
20337  // CSEL 0, cttz(X), eq(X, 0) -> AND cttz bitwidth-1
20338  // CSEL cttz(X), 0, ne(X, 0) -> AND cttz bitwidth-1
20339  if (SDValue Folded = foldCSELofCTTZ(N, DAG))
20340  return Folded;
20341 
20342  return performCONDCombine(N, DCI, DAG, 2, 3);
20343 }
20344 
20345 // Try to re-use an already extended operand of a vector SetCC feeding a
20346 // extended select. Doing so avoids requiring another full extension of the
20347 // SET_CC result when lowering the select.
20349  EVT Op0MVT = Op->getOperand(0).getValueType();
20350  if (!Op0MVT.isVector() || Op->use_empty())
20351  return SDValue();
20352 
20353  // Make sure that all uses of Op are VSELECTs with result matching types where
20354  // the result type has a larger element type than the SetCC operand.
20355  SDNode *FirstUse = *Op->use_begin();
20356  if (FirstUse->getOpcode() != ISD::VSELECT)
20357  return SDValue();
20358  EVT UseMVT = FirstUse->getValueType(0);
20359  if (UseMVT.getScalarSizeInBits() <= Op0MVT.getScalarSizeInBits())
20360  return SDValue();
20361  if (any_of(Op->uses(), [&UseMVT](const SDNode *N) {
20362  return N->getOpcode() != ISD::VSELECT || N->getValueType(0) != UseMVT;
20363  }))
20364  return SDValue();
20365 
20366  APInt V;
20367  if (!ISD::isConstantSplatVector(Op->getOperand(1).getNode(), V))
20368  return SDValue();
20369 
20370  SDLoc DL(Op);
20371  SDValue Op0ExtV;
20372  SDValue Op1ExtV;
20373  ISD::CondCode CC = cast<CondCodeSDNode>(Op->getOperand(2))->get();
20374  // Check if the first operand of the SET_CC is already extended. If it is,
20375  // split the SET_CC and re-use the extended version of the operand.
20376  SDNode *Op0SExt = DAG.getNodeIfExists(ISD::SIGN_EXTEND, DAG.getVTList(UseMVT),
20377  Op->getOperand(0));
20378  SDNode *Op0ZExt = DAG.getNodeIfExists(ISD::ZERO_EXTEND, DAG.getVTList(UseMVT),
20379  Op->getOperand(0));
20380  if (Op0SExt && (isSignedIntSetCC(CC) || isIntEqualitySetCC(CC))) {
20381  Op0ExtV = SDValue(Op0SExt, 0);
20382  Op1ExtV = DAG.getNode(ISD::SIGN_EXTEND, DL, UseMVT, Op->getOperand(1));
20383  } else if (Op0ZExt && (isUnsignedIntSetCC(CC) || isIntEqualitySetCC(CC))) {
20384  Op0ExtV = SDValue(Op0ZExt, 0);
20385  Op1ExtV = DAG.getNode(ISD::ZERO_EXTEND, DL, UseMVT, Op->getOperand(1));
20386  } else
20387  return SDValue();
20388 
20389  return DAG.getNode(ISD::SETCC, DL, UseMVT.changeVectorElementType(MVT::i1),
20390  Op0ExtV, Op1ExtV, Op->getOperand(2));
20391 }
20392 
20395  SelectionDAG &DAG) {
20396  assert(N->getOpcode() == ISD::SETCC && "Unexpected opcode!");
20397  SDValue LHS = N->getOperand(0);
20398  SDValue RHS = N->getOperand(1);
20399  ISD::CondCode Cond = cast<CondCodeSDNode>(N->getOperand(2))->get();
20400  SDLoc DL(N);
20401  EVT VT = N->getValueType(0);
20402 
20403  if (SDValue V = tryToWidenSetCCOperands(N, DAG))
20404  return V;
20405 
20406  // setcc (csel 0, 1, cond, X), 1, ne ==> csel 0, 1, !cond, X
20407  if (Cond == ISD::SETNE && isOneConstant(RHS) &&
20408  LHS->getOpcode() == AArch64ISD::CSEL &&
20409  isNullConstant(LHS->getOperand(0)) && isOneConstant(LHS->getOperand(1)) &&
20410  LHS->hasOneUse()) {
20411  // Invert CSEL's condition.
20412  auto *OpCC = cast<ConstantSDNode>(LHS.getOperand(2));
20413  auto OldCond = static_cast<AArch64CC::CondCode>(OpCC->getZExtValue());
20414  auto NewCond = getInvertedCondCode(OldCond);
20415 
20416  // csel 0, 1, !cond, X
20417  SDValue CSEL =
20418  DAG.getNode(AArch64ISD::CSEL, DL, LHS.getValueType(), LHS.getOperand(0),
20419  LHS.getOperand(1), DAG.getConstant(NewCond, DL, MVT::i32),
20420  LHS.getOperand(3));
20421  return DAG.getZExtOrTrunc(CSEL, DL, VT);
20422  }
20423 
20424  // setcc (srl x, imm), 0, ne ==> setcc (and x, (-1 << imm)), 0, ne
20425  if (Cond == ISD::SETNE && isNullConstant(RHS) &&
20426  LHS->getOpcode() == ISD::SRL && isa<ConstantSDNode>(LHS->getOperand(1)) &&
20427  LHS->getConstantOperandVal(1) < VT.getScalarSizeInBits() &&
20428  LHS->hasOneUse()) {
20429  EVT TstVT = LHS->getValueType(0);
20430  if (TstVT.isScalarInteger() && TstVT.getFixedSizeInBits() <= 64) {
20431  // this pattern will get better opt in emitComparison
20432  uint64_t TstImm = -1ULL << LHS->getConstantOperandVal(1);
20433  SDValue TST = DAG.getNode(ISD::AND, DL, TstVT, LHS->getOperand(0),
20434  DAG.getConstant(TstImm, DL, TstVT));
20435  return DAG.getNode(ISD::SETCC, DL, VT, TST, RHS, N->getOperand(2));
20436  }
20437  }
20438 
20439  // setcc (iN (bitcast (vNi1 X))), 0, (eq|ne)
20440  // ==> setcc (iN (zext (i1 (vecreduce_or (vNi1 X))))), 0, (eq|ne)
20441  if (DCI.isBeforeLegalize() && VT.isScalarInteger() &&
20442  (Cond == ISD::SETEQ || Cond == ISD::SETNE) && isNullConstant(RHS) &&
20443  LHS->getOpcode() == ISD::BITCAST) {
20444  EVT ToVT = LHS->getValueType(0);
20445  EVT FromVT = LHS->getOperand(0).getValueType();
20446  if (FromVT.isFixedLengthVector() &&
20447  FromVT.getVectorElementType() == MVT::i1) {
20448  LHS = DAG.getNode(ISD::VECREDUCE_OR, DL, MVT::i1, LHS->getOperand(0));
20449  LHS = DAG.getNode(ISD::ZERO_EXTEND, DL, ToVT, LHS);
20450  return DAG.getSetCC(DL, VT, LHS, RHS, Cond);
20451  }
20452  }
20453 
20454  // Try to perform the memcmp when the result is tested for [in]equality with 0
20455  if (SDValue V = performOrXorChainCombine(N, DAG))
20456  return V;
20457 
20458  return SDValue();
20459 }
20460 
20461 // Replace a flag-setting operator (eg ANDS) with the generic version
20462 // (eg AND) if the flag is unused.
20465  unsigned GenericOpcode) {
20466  SDLoc DL(N);
20467  SDValue LHS = N->getOperand(0);
20468  SDValue RHS = N->getOperand(1);
20469  EVT VT = N->getValueType(0);
20470 
20471  // If the flag result isn't used, convert back to a generic opcode.
20472  if (!N->hasAnyUseOfValue(1)) {
20473  SDValue Res = DCI.DAG.getNode(GenericOpcode, DL, VT, N->ops());
20474  return DCI.DAG.getMergeValues({Res, DCI.DAG.getConstant(0, DL, MVT::i32)},
20475  DL);
20476  }
20477 
20478  // Combine identical generic nodes into this node, re-using the result.
20479  if (SDNode *Generic = DCI.DAG.getNodeIfExists(
20480  GenericOpcode, DCI.DAG.getVTList(VT), {LHS, RHS}))
20481  DCI.CombineTo(Generic, SDValue(N, 0));
20482 
20483  return SDValue();
20484 }
20485 
20487  // setcc_merge_zero pred
20488  // (sign_extend (extract_subvector (setcc_merge_zero ... pred ...))), 0, ne
20489  // => extract_subvector (inner setcc_merge_zero)
20490  SDValue Pred = N->getOperand(0);
20491  SDValue LHS = N->getOperand(1);
20492  SDValue RHS = N->getOperand(2);
20493  ISD::CondCode Cond = cast<CondCodeSDNode>(N->getOperand(3))->get();
20494 
20495  if (Cond != ISD::SETNE || !isZerosVector(RHS.getNode()) ||
20496  LHS->getOpcode() != ISD::SIGN_EXTEND)
20497  return SDValue();
20498 
20499  SDValue Extract = LHS->getOperand(0);
20500  if (Extract->getOpcode() != ISD::EXTRACT_SUBVECTOR ||
20501  Extract->getValueType(0) != N->getValueType(0) ||
20502  Extract->getConstantOperandVal(1) != 0)
20503  return SDValue();
20504 
20505  SDValue InnerSetCC = Extract->getOperand(0);
20506  if (InnerSetCC->getOpcode() != AArch64ISD::SETCC_MERGE_ZERO)
20507  return SDValue();
20508 
20509  // By this point we've effectively got
20510  // zero_inactive_lanes_and_trunc_i1(sext_i1(A)). If we can prove A's inactive
20511  // lanes are already zero then the trunc(sext()) sequence is redundant and we
20512  // can operate on A directly.
20513  SDValue InnerPred = InnerSetCC.getOperand(0);
20514  if (Pred.getOpcode() == AArch64ISD::PTRUE &&
20515  InnerPred.getOpcode() == AArch64ISD::PTRUE &&
20516  Pred.getConstantOperandVal(0) == InnerPred.getConstantOperandVal(0) &&
20517  Pred->getConstantOperandVal(0) >= AArch64SVEPredPattern::vl1 &&
20518  Pred->getConstantOperandVal(0) <= AArch64SVEPredPattern::vl256)
20519  return Extract;
20520 
20521  return SDValue();
20522 }
20523 
20524 static SDValue
20526  assert(N->getOpcode() == AArch64ISD::SETCC_MERGE_ZERO &&
20527  "Unexpected opcode!");
20528 
20529  SelectionDAG &DAG = DCI.DAG;
20530  SDValue Pred = N->getOperand(0);
20531  SDValue LHS = N->getOperand(1);
20532  SDValue RHS = N->getOperand(2);
20533  ISD::CondCode Cond = cast<CondCodeSDNode>(N->getOperand(3))->get();
20534 
20535  if (SDValue V = performSetCCPunpkCombine(N, DAG))
20536  return V;
20537 
20538  if (Cond == ISD::SETNE && isZerosVector(RHS.getNode()) &&
20539  LHS->getOpcode() == ISD::SIGN_EXTEND &&
20540  LHS->getOperand(0)->getValueType(0) == N->getValueType(0)) {
20541  // setcc_merge_zero(
20542  // pred, extend(setcc_merge_zero(pred, ...)), != splat(0))
20543  // => setcc_merge_zero(pred, ...)
20544  if (LHS->getOperand(0)->getOpcode() == AArch64ISD::SETCC_MERGE_ZERO &&
20545  LHS->getOperand(0)->getOperand(0) == Pred)
20546  return LHS->getOperand(0);
20547 
20548  // setcc_merge_zero(
20549  // all_active, extend(nxvNi1 ...), != splat(0))
20550  // -> nxvNi1 ...
20551  if (isAllActivePredicate(DAG, Pred))
20552  return LHS->getOperand(0);
20553 
20554  // setcc_merge_zero(
20555  // pred, extend(nxvNi1 ...), != splat(0))
20556  // -> nxvNi1 and(pred, ...)
20557  if (DCI.isAfterLegalizeDAG())
20558  // Do this after legalization to allow more folds on setcc_merge_zero
20559  // to be recognized.
20560  return DAG.getNode(ISD::AND, SDLoc(N), N->getValueType(0),
20561  LHS->getOperand(0), Pred);
20562  }
20563 
20564  return SDValue();
20565 }
20566 
20567 // Optimize some simple tbz/tbnz cases. Returns the new operand and bit to test
20568 // as well as whether the test should be inverted. This code is required to
20569 // catch these cases (as opposed to standard dag combines) because
20570 // AArch64ISD::TBZ is matched during legalization.
20571 static SDValue getTestBitOperand(SDValue Op, unsigned &Bit, bool &Invert,
20572  SelectionDAG &DAG) {
20573 
20574  if (!Op->hasOneUse())
20575  return Op;
20576 
20577  // We don't handle undef/constant-fold cases below, as they should have
20578  // already been taken care of (e.g. and of 0, test of undefined shifted bits,
20579  // etc.)
20580 
20581  // (tbz (trunc x), b) -> (tbz x, b)
20582  // This case is just here to enable more of the below cases to be caught.
20583  if (Op->getOpcode() == ISD::TRUNCATE &&
20584  Bit < Op->getValueType(0).getSizeInBits()) {
20585  return getTestBitOperand(Op->getOperand(0), Bit, Invert, DAG);
20586  }
20587 
20588  // (tbz (any_ext x), b) -> (tbz x, b) if we don't use the extended bits.
20589  if (Op->getOpcode() == ISD::ANY_EXTEND &&
20590  Bit < Op->getOperand(0).getValueSizeInBits()) {
20591  return getTestBitOperand(Op->getOperand(0), Bit, Invert, DAG);
20592  }
20593 
20594  if (Op->getNumOperands() != 2)
20595  return Op;
20596 
20597  auto *C = dyn_cast<ConstantSDNode>(Op->getOperand(1));
20598  if (!C)
20599  return Op;
20600 
20601  switch (Op->getOpcode()) {
20602  default:
20603  return Op;
20604 
20605  // (tbz (and x, m), b) -> (tbz x, b)
20606  case ISD::AND:
20607  if ((C->getZExtValue() >> Bit) & 1)
20608  return getTestBitOperand(Op->getOperand(0), Bit, Invert, DAG);
20609  return Op;
20610 
20611  // (tbz (shl x, c), b) -> (tbz x, b-c)
20612  case ISD::SHL:
20613  if (C->getZExtValue() <= Bit &&
20614  (Bit - C->getZExtValue()) < Op->getValueType(0).getSizeInBits()) {
20615  Bit = Bit - C->getZExtValue();
20616  return getTestBitOperand(Op->getOperand(0), Bit, Invert, DAG);
20617  }
20618  return Op;
20619 
20620  // (tbz (sra x, c), b) -> (tbz x, b+c) or (tbz x, msb) if b+c is > # bits in x
20621  case ISD::SRA:
20622  Bit = Bit + C->getZExtValue();
20623  if (Bit >= Op->getValueType(0).getSizeInBits())
20624  Bit = Op->getValueType(0).getSizeInBits() - 1;
20625  return getTestBitOperand(Op->getOperand(0), Bit, Invert, DAG);
20626 
20627  // (tbz (srl x, c), b) -> (tbz x, b+c)
20628  case ISD::SRL:
20629  if ((Bit + C->getZExtValue()) < Op->getValueType(0).getSizeInBits()) {
20630  Bit = Bit + C->getZExtValue();
20631  return getTestBitOperand(Op->getOperand(0), Bit, Invert, DAG);
20632  }
20633  return Op;
20634 
20635  // (tbz (xor x, -1), b) -> (tbnz x, b)
20636  case ISD::XOR:
20637  if ((C->getZExtValue() >> Bit) & 1)
20638  Invert = !Invert;
20639  return getTestBitOperand(Op->getOperand(0), Bit, Invert, DAG);
20640  }
20641 }
20642 
20643 // Optimize test single bit zero/non-zero and branch.
20646  SelectionDAG &DAG) {
20647  unsigned Bit = cast<ConstantSDNode>(N->getOperand(2))->getZExtValue();
20648  bool Invert = false;
20649  SDValue TestSrc = N->getOperand(1);
20650  SDValue NewTestSrc = getTestBitOperand(TestSrc, Bit, Invert, DAG);
20651 
20652  if (TestSrc == NewTestSrc)
20653  return SDValue();
20654 
20655  unsigned NewOpc = N->getOpcode();
20656  if (Invert) {
20657  if (NewOpc == AArch64ISD::TBZ)
20658  NewOpc = AArch64ISD::TBNZ;
20659  else {
20660  assert(NewOpc == AArch64ISD::TBNZ);
20661  NewOpc = AArch64ISD::TBZ;
20662  }
20663  }
20664 
20665  SDLoc DL(N);
20666  return DAG.getNode(NewOpc, DL, MVT::Other, N->getOperand(0), NewTestSrc,
20667  DAG.getConstant(Bit, DL, MVT::i64), N->getOperand(3));
20668 }
20669 
20670 // Swap vselect operands where it may allow a predicated operation to achieve
20671 // the `sel`.
20672 //
20673 // (vselect (setcc ( condcode) (_) (_)) (a) (op (a) (b)))
20674 // => (vselect (setcc (!condcode) (_) (_)) (op (a) (b)) (a))
20676  auto SelectA = N->getOperand(1);
20677  auto SelectB = N->getOperand(2);
20678  auto NTy = N->getValueType(0);
20679 
20680  if (!NTy.isScalableVector())
20681  return SDValue();
20682  SDValue SetCC = N->getOperand(0);
20683  if (SetCC.getOpcode() != ISD::SETCC || !SetCC.hasOneUse())
20684  return SDValue();
20685 
20686  switch (SelectB.getOpcode()) {
20687  default:
20688  return SDValue();
20689  case ISD::FMUL:
20690  case ISD::FSUB:
20691  case ISD::FADD:
20692  break;
20693  }
20694  if (SelectA != SelectB.getOperand(0))
20695  return SDValue();
20696 
20697  ISD::CondCode CC = cast<CondCodeSDNode>(SetCC.getOperand(2))->get();
20698  ISD::CondCode InverseCC =
20700  auto InverseSetCC =
20701  DAG.getSetCC(SDLoc(SetCC), SetCC.getValueType(), SetCC.getOperand(0),
20702  SetCC.getOperand(1), InverseCC);
20703 
20704  return DAG.getNode(ISD::VSELECT, SDLoc(N), NTy,
20705  {InverseSetCC, SelectB, SelectA});
20706 }
20707 
20708 // vselect (v1i1 setcc) ->
20709 // vselect (v1iXX setcc) (XX is the size of the compared operand type)
20710 // FIXME: Currently the type legalizer can't handle VSELECT having v1i1 as
20711 // condition. If it can legalize "VSELECT v1i1" correctly, no need to combine
20712 // such VSELECT.
20714  if (auto SwapResult = trySwapVSelectOperands(N, DAG))
20715  return SwapResult;
20716 
20717  SDValue N0 = N->getOperand(0);
20718  EVT CCVT = N0.getValueType();
20719 
20720  if (isAllActivePredicate(DAG, N0))
20721  return N->getOperand(1);
20722 
20723  if (isAllInactivePredicate(N0))
20724  return N->getOperand(2);
20725 
20726  // Check for sign pattern (VSELECT setgt, iN lhs, -1, 1, -1) and transform
20727  // into (OR (ASR lhs, N-1), 1), which requires less instructions for the
20728  // supported types.
20729  SDValue SetCC = N->getOperand(0);
20730  if (SetCC.getOpcode() == ISD::SETCC &&
20731  SetCC.getOperand(2) == DAG.getCondCode(ISD::SETGT)) {
20732  SDValue CmpLHS = SetCC.getOperand(0);
20733  EVT VT = CmpLHS.getValueType();
20734  SDNode *CmpRHS = SetCC.getOperand(1).getNode();
20735  SDNode *SplatLHS = N->getOperand(1).getNode();
20736  SDNode *SplatRHS = N->getOperand(2).getNode();
20737  APInt SplatLHSVal;
20738  if (CmpLHS.getValueType() == N->getOperand(1).getValueType() &&
20739  VT.isSimple() &&
20742  VT.getSimpleVT().SimpleTy) &&
20743  ISD::isConstantSplatVector(SplatLHS, SplatLHSVal) &&
20744  SplatLHSVal.isOne() && ISD::isConstantSplatVectorAllOnes(CmpRHS) &&
20746  unsigned NumElts = VT.getVectorNumElements();
20748  NumElts, DAG.getConstant(VT.getScalarSizeInBits() - 1, SDLoc(N),
20749  VT.getScalarType()));
20750  SDValue Val = DAG.getBuildVector(VT, SDLoc(N), Ops);
20751 
20752  auto Shift = DAG.getNode(ISD::SRA, SDLoc(N), VT, CmpLHS, Val);
20753  auto Or = DAG.getNode(ISD::OR, SDLoc(N), VT, Shift, N->getOperand(1));
20754  return Or;
20755  }
20756  }
20757 
20758  if (N0.getOpcode() != ISD::SETCC ||
20760  CCVT.getVectorElementType() != MVT::i1)
20761  return SDValue();
20762 
20763  EVT ResVT = N->getValueType(0);
20764  EVT CmpVT = N0.getOperand(0).getValueType();
20765  // Only combine when the result type is of the same size as the compared
20766  // operands.
20767  if (ResVT.getSizeInBits() != CmpVT.getSizeInBits())
20768  return SDValue();
20769 
20770  SDValue IfTrue = N->getOperand(1);
20771  SDValue IfFalse = N->getOperand(2);
20772  SetCC = DAG.getSetCC(SDLoc(N), CmpVT.changeVectorElementTypeToInteger(),
20773  N0.getOperand(0), N0.getOperand(1),
20774  cast<CondCodeSDNode>(N0.getOperand(2))->get());
20775  return DAG.getNode(ISD::VSELECT, SDLoc(N), ResVT, SetCC,
20776  IfTrue, IfFalse);
20777 }
20778 
20779 /// A vector select: "(select vL, vR, (setcc LHS, RHS))" is best performed with
20780 /// the compare-mask instructions rather than going via NZCV, even if LHS and
20781 /// RHS are really scalar. This replaces any scalar setcc in the above pattern
20782 /// with a vector one followed by a DUP shuffle on the result.
20785  SelectionDAG &DAG = DCI.DAG;
20786  SDValue N0 = N->getOperand(0);
20787  EVT ResVT = N->getValueType(0);
20788 
20789  if (N0.getOpcode() != ISD::SETCC)
20790  return SDValue();
20791 
20792  if (ResVT.isScalableVector())
20793  return SDValue();
20794 
20795  // Make sure the SETCC result is either i1 (initial DAG), or i32, the lowered
20796  // scalar SetCCResultType. We also don't expect vectors, because we assume
20797  // that selects fed by vector SETCCs are canonicalized to VSELECT.
20798  assert((N0.getValueType() == MVT::i1 || N0.getValueType() == MVT::i32) &&
20799  "Scalar-SETCC feeding SELECT has unexpected result type!");
20800 
20801  // If NumMaskElts == 0, the comparison is larger than select result. The
20802  // largest real NEON comparison is 64-bits per lane, which means the result is
20803  // at most 32-bits and an illegal vector. Just bail out for now.
20804  EVT SrcVT = N0.getOperand(0).getValueType();
20805 
20806  // Don't try to do this optimization when the setcc itself has i1 operands.
20807  // There are no legal vectors of i1, so this would be pointless.
20808  if (SrcVT == MVT::i1)
20809  return SDValue();
20810 
20811  int NumMaskElts = ResVT.getSizeInBits() / SrcVT.getSizeInBits();
20812  if (!ResVT.isVector() || NumMaskElts == 0)
20813  return SDValue();
20814 
20815  SrcVT = EVT::getVectorVT(*DAG.getContext(), SrcVT, NumMaskElts);
20816  EVT CCVT = SrcVT.changeVectorElementTypeToInteger();
20817 
20818  // Also bail out if the vector CCVT isn't the same size as ResVT.
20819  // This can happen if the SETCC operand size doesn't divide the ResVT size
20820  // (e.g., f64 vs v3f32).
20821  if (CCVT.getSizeInBits() != ResVT.getSizeInBits())
20822  return SDValue();
20823 
20824  // Make sure we didn't create illegal types, if we're not supposed to.
20825  assert(DCI.isBeforeLegalize() ||
20826  DAG.getTargetLoweringInfo().isTypeLegal(SrcVT));
20827 
20828  // First perform a vector comparison, where lane 0 is the one we're interested
20829  // in.
20830  SDLoc DL(N0);
20831  SDValue LHS =
20832  DAG.getNode(ISD::SCALAR_TO_VECTOR, DL, SrcVT, N0.getOperand(0));
20833  SDValue RHS =
20834  DAG.getNode(ISD::SCALAR_TO_VECTOR, DL, SrcVT, N0.getOperand(1));
20835  SDValue SetCC = DAG.getNode(ISD::SETCC, DL, CCVT, LHS, RHS, N0.getOperand(2));
20836 
20837  // Now duplicate the comparison mask we want across all other lanes.
20838  SmallVector<int, 8> DUPMask(CCVT.getVectorNumElements(), 0);
20839  SDValue Mask = DAG.getVectorShuffle(CCVT, DL, SetCC, SetCC, DUPMask);
20840  Mask = DAG.getNode(ISD::BITCAST, DL,
20842 
20843  return DAG.getSelect(DL, ResVT, Mask, N->getOperand(1), N->getOperand(2));
20844 }
20845 
20848  EVT VT = N->getValueType(0);
20849  // If "v2i32 DUP(x)" and "v4i32 DUP(x)" both exist, use an extract from the
20850  // 128bit vector version.
20851  if (VT.is64BitVector() && DCI.isAfterLegalizeDAG()) {
20853  if (SDNode *LN = DCI.DAG.getNodeIfExists(
20854  N->getOpcode(), DCI.DAG.getVTList(LVT), {N->getOperand(0)})) {
20855  SDLoc DL(N);
20856  return DCI.DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, SDValue(LN, 0),
20857  DCI.DAG.getConstant(0, DL, MVT::i64));
20858  }
20859  }
20860 
20861  return performPostLD1Combine(N, DCI, false);
20862 }
20863 
20864 /// Get rid of unnecessary NVCASTs (that don't change the type).
20866  if (N->getValueType(0) == N->getOperand(0).getValueType())
20867  return N->getOperand(0);
20868 
20869  return SDValue();
20870 }
20871 
20872 // If all users of the globaladdr are of the form (globaladdr + constant), find
20873 // the smallest constant, fold it into the globaladdr's offset and rewrite the
20874 // globaladdr as (globaladdr + constant) - constant.
20876  const AArch64Subtarget *Subtarget,
20877  const TargetMachine &TM) {
20878  auto *GN = cast<GlobalAddressSDNode>(N);
20879  if (Subtarget->ClassifyGlobalReference(GN->getGlobal(), TM) !=
20881  return SDValue();
20882 
20883  uint64_t MinOffset = -1ull;
20884  for (SDNode *N : GN->uses()) {
20885  if (N->getOpcode() != ISD::ADD)
20886  return SDValue();
20887  auto *C = dyn_cast<ConstantSDNode>(N->getOperand(0));
20888  if (!C)
20889  C = dyn_cast<ConstantSDNode>(N->getOperand(1));
20890  if (!C)
20891  return SDValue();
20892  MinOffset = std::min(MinOffset, C->getZExtValue());
20893  }
20894  uint64_t Offset = MinOffset + GN->getOffset();
20895 
20896  // Require that the new offset is larger than the existing one. Otherwise, we
20897  // can end up oscillating between two possible DAGs, for example,
20898  // (add (add globaladdr + 10, -1), 1) and (add globaladdr + 9, 1).
20899  if (Offset <= uint64_t(GN->getOffset()))
20900  return SDValue();
20901 
20902  // Check whether folding this offset is legal. It must not go out of bounds of
20903  // the referenced object to avoid violating the code model, and must be
20904  // smaller than 2^20 because this is the largest offset expressible in all
20905  // object formats. (The IMAGE_REL_ARM64_PAGEBASE_REL21 relocation in COFF
20906  // stores an immediate signed 21 bit offset.)
20907  //
20908  // This check also prevents us from folding negative offsets, which will end
20909  // up being treated in the same way as large positive ones. They could also
20910  // cause code model violations, and aren't really common enough to matter.
20911  if (Offset >= (1 << 20))
20912  return SDValue();
20913 
20914  const GlobalValue *GV = GN->getGlobal();
20915  Type *T = GV->getValueType();
20916  if (!T->isSized() ||
20918  return SDValue();
20919 
20920  SDLoc DL(GN);
20921  SDValue Result = DAG.getGlobalAddress(GV, DL, MVT::i64, Offset);
20922  return DAG.getNode(ISD::SUB, DL, MVT::i64, Result,
20923  DAG.getConstant(MinOffset, DL, MVT::i64));
20924 }
20925 
20927  const AArch64Subtarget *Subtarget) {
20928  SDValue BR = N->getOperand(0);
20929  if (!Subtarget->hasCSSC() || BR.getOpcode() != ISD::BITREVERSE ||
20930  !BR.getValueType().isScalarInteger())
20931  return SDValue();
20932 
20933  SDLoc DL(N);
20934  return DAG.getNode(ISD::CTTZ, DL, BR.getValueType(), BR.getOperand(0));
20935 }
20936 
20937 // Turns the vector of indices into a vector of byte offstes by scaling Offset
20938 // by (BitWidth / 8).
20940  SDLoc DL, unsigned BitWidth) {
20941  assert(Offset.getValueType().isScalableVector() &&
20942  "This method is only for scalable vectors of offsets");
20943 
20945  SDValue SplatShift = DAG.getNode(ISD::SPLAT_VECTOR, DL, MVT::nxv2i64, Shift);
20946 
20947  return DAG.getNode(ISD::SHL, DL, MVT::nxv2i64, Offset, SplatShift);
20948 }
20949 
20950 /// Check if the value of \p OffsetInBytes can be used as an immediate for
20951 /// the gather load/prefetch and scatter store instructions with vector base and
20952 /// immediate offset addressing mode:
20953 ///
20954 /// [<Zn>.[S|D]{, #<imm>}]
20955 ///
20956 /// where <imm> = sizeof(<T>) * k, for k = 0, 1, ..., 31.
20957 inline static bool isValidImmForSVEVecImmAddrMode(unsigned OffsetInBytes,
20958  unsigned ScalarSizeInBytes) {
20959  // The immediate is not a multiple of the scalar size.
20960  if (OffsetInBytes % ScalarSizeInBytes)
20961  return false;
20962 
20963  // The immediate is out of range.
20964  if (OffsetInBytes / ScalarSizeInBytes > 31)
20965  return false;
20966 
20967  return true;
20968 }
20969 
20970 /// Check if the value of \p Offset represents a valid immediate for the SVE
20971 /// gather load/prefetch and scatter store instructiona with vector base and
20972 /// immediate offset addressing mode:
20973 ///
20974 /// [<Zn>.[S|D]{, #<imm>}]
20975 ///
20976 /// where <imm> = sizeof(<T>) * k, for k = 0, 1, ..., 31.
20978  unsigned ScalarSizeInBytes) {
20979  ConstantSDNode *OffsetConst = dyn_cast<ConstantSDNode>(Offset.getNode());
20980  return OffsetConst && isValidImmForSVEVecImmAddrMode(
20981  OffsetConst->getZExtValue(), ScalarSizeInBytes);
20982 }
20983 
20985  unsigned Opcode,
20986  bool OnlyPackedOffsets = true) {
20987  const SDValue Src = N->getOperand(2);
20988  const EVT SrcVT = Src->getValueType(0);
20989  assert(SrcVT.isScalableVector() &&
20990  "Scatter stores are only possible for SVE vectors");
20991 
20992  SDLoc DL(N);
20993  MVT SrcElVT = SrcVT.getVectorElementType().getSimpleVT();
20994 
20995  // Make sure that source data will fit into an SVE register
20997  return SDValue();
20998 
20999  // For FPs, ACLE only supports _packed_ single and double precision types.
21000  if (SrcElVT.isFloatingPoint())
21001  if ((SrcVT != MVT::nxv4f32) && (SrcVT != MVT::nxv2f64))
21002  return SDValue();
21003 
21004  // Depending on the addressing mode, this is either a pointer or a vector of
21005  // pointers (that fits into one register)
21006  SDValue Base = N->getOperand(4);
21007  // Depending on the addressing mode, this is either a single offset or a
21008  // vector of offsets (that fits into one register)
21009  SDValue Offset = N->getOperand(5);
21010 
21011  // For "scalar + vector of indices", just scale the indices. This only
21012  // applies to non-temporal scatters because there's no instruction that takes
21013  // indicies.
21014  if (Opcode == AArch64ISD::SSTNT1_INDEX_PRED) {
21015  Offset =
21017  Opcode = AArch64ISD::SSTNT1_PRED;
21018  }
21019 
21020  // In the case of non-temporal gather loads there's only one SVE instruction
21021  // per data-size: "scalar + vector", i.e.
21022  // * stnt1{b|h|w|d} { z0.s }, p0/z, [z0.s, x0]
21023  // Since we do have intrinsics that allow the arguments to be in a different
21024  // order, we may need to swap them to match the spec.
21025  if (Opcode == AArch64ISD::SSTNT1_PRED && Offset.getValueType().isVector())
21026  std::swap(Base, Offset);
21027 
21028  // SST1_IMM requires that the offset is an immediate that is:
21029  // * a multiple of #SizeInBytes,
21030  // * in the range [0, 31 x #SizeInBytes],
21031  // where #SizeInBytes is the size in bytes of the stored items. For
21032  // immediates outside that range and non-immediate scalar offsets use SST1 or
21033  // SST1_UXTW instead.
21034  if (Opcode == AArch64ISD::SST1_IMM_PRED) {
21036  SrcVT.getScalarSizeInBits() / 8)) {
21037  if (MVT::nxv4i32 == Base.getValueType().getSimpleVT().SimpleTy)
21038  Opcode = AArch64ISD::SST1_UXTW_PRED;
21039  else
21040  Opcode = AArch64ISD::SST1_PRED;
21041 
21042  std::swap(Base, Offset);
21043  }
21044  }
21045 
21046  auto &TLI = DAG.getTargetLoweringInfo();
21047  if (!TLI.isTypeLegal(Base.getValueType()))
21048  return SDValue();
21049 
21050  // Some scatter store variants allow unpacked offsets, but only as nxv2i32
21051  // vectors. These are implicitly sign (sxtw) or zero (zxtw) extend to
21052  // nxv2i64. Legalize accordingly.
21053  if (!OnlyPackedOffsets &&
21054  Offset.getValueType().getSimpleVT().SimpleTy == MVT::nxv2i32)
21056 
21057  if (!TLI.isTypeLegal(Offset.getValueType()))
21058  return SDValue();
21059 
21060  // Source value type that is representable in hardware
21061  EVT HwSrcVt = getSVEContainerType(SrcVT);
21062 
21063  // Keep the original type of the input data to store - this is needed to be
21064  // able to select the correct instruction, e.g. ST1B, ST1H, ST1W and ST1D. For
21065  // FP values we want the integer equivalent, so just use HwSrcVt.
21066  SDValue InputVT = DAG.getValueType(SrcVT);
21067  if (SrcVT.isFloatingPoint())
21068  InputVT = DAG.getValueType(HwSrcVt);
21069 
21070  SDVTList VTs = DAG.getVTList(MVT::Other);
21071  SDValue SrcNew;
21072 
21073  if (Src.getValueType().isFloatingPoint())
21074  SrcNew = DAG.getNode(ISD::BITCAST, DL, HwSrcVt, Src);
21075  else
21076  SrcNew = DAG.getNode(ISD::ANY_EXTEND, DL, HwSrcVt, Src);
21077 
21078  SDValue Ops[] = {N->getOperand(0), // Chain
21079  SrcNew,
21080  N->getOperand(3), // Pg
21081  Base,
21082  Offset,
21083  InputVT};
21084 
21085  return DAG.getNode(Opcode, DL, VTs, Ops);
21086 }
21087 
21089  unsigned Opcode,
21090  bool OnlyPackedOffsets = true) {
21091  const EVT RetVT = N->getValueType(0);
21092  assert(RetVT.isScalableVector() &&
21093  "Gather loads are only possible for SVE vectors");
21094 
21095  SDLoc DL(N);
21096 
21097  // Make sure that the loaded data will fit into an SVE register
21099  return SDValue();
21100 
21101  // Depending on the addressing mode, this is either a pointer or a vector of
21102  // pointers (that fits into one register)
21103  SDValue Base = N->getOperand(3);
21104  // Depending on the addressing mode, this is either a single offset or a
21105  // vector of offsets (that fits into one register)
21106  SDValue Offset = N->getOperand(4);
21107 
21108  // For "scalar + vector of indices", just scale the indices. This only
21109  // applies to non-temporal gathers because there's no instruction that takes
21110  // indicies.
21111  if (Opcode == AArch64ISD::GLDNT1_INDEX_MERGE_ZERO) {
21113  RetVT.getScalarSizeInBits());
21115  }
21116 
21117  // In the case of non-temporal gather loads there's only one SVE instruction
21118  // per data-size: "scalar + vector", i.e.
21119  // * ldnt1{b|h|w|d} { z0.s }, p0/z, [z0.s, x0]
21120  // Since we do have intrinsics that allow the arguments to be in a different
21121  // order, we may need to swap them to match the spec.
21122  if (Opcode == AArch64ISD::GLDNT1_MERGE_ZERO &&
21123  Offset.getValueType().isVector())
21124  std::swap(Base, Offset);
21125 
21126  // GLD{FF}1_IMM requires that the offset is an immediate that is:
21127  // * a multiple of #SizeInBytes,
21128  // * in the range [0, 31 x #SizeInBytes],
21129  // where #SizeInBytes is the size in bytes of the loaded items. For
21130  // immediates outside that range and non-immediate scalar offsets use
21131  // GLD1_MERGE_ZERO or GLD1_UXTW_MERGE_ZERO instead.
21132  if (Opcode == AArch64ISD::GLD1_IMM_MERGE_ZERO ||
21135  RetVT.getScalarSizeInBits() / 8)) {
21136  if (MVT::nxv4i32 == Base.getValueType().getSimpleVT().SimpleTy)
21137  Opcode = (Opcode == AArch64ISD::GLD1_IMM_MERGE_ZERO)
21140  else
21141  Opcode = (Opcode == AArch64ISD::GLD1_IMM_MERGE_ZERO)
21144 
21145  std::swap(Base, Offset);
21146  }
21147  }
21148 
21149  auto &TLI = DAG.getTargetLoweringInfo();
21150  if (!TLI.isTypeLegal(Base.getValueType()))
21151  return SDValue();
21152 
21153  // Some gather load variants allow unpacked offsets, but only as nxv2i32
21154  // vectors. These are implicitly sign (sxtw) or zero (zxtw) extend to
21155  // nxv2i64. Legalize accordingly.
21156  if (!OnlyPackedOffsets &&
21157  Offset.getValueType().getSimpleVT().SimpleTy == MVT::nxv2i32)
21159 
21160  // Return value type that is representable in hardware
21161  EVT HwRetVt = getSVEContainerType(RetVT);
21162 
21163  // Keep the original output value type around - this is needed to be able to
21164  // select the correct instruction, e.g. LD1B, LD1H, LD1W and LD1D. For FP
21165  // values we want the integer equivalent, so just use HwRetVT.
21166  SDValue OutVT = DAG.getValueType(RetVT);
21167  if (RetVT.isFloatingPoint())
21168  OutVT = DAG.getValueType(HwRetVt);
21169 
21170  SDVTList VTs = DAG.getVTList(HwRetVt, MVT::Other);
21171  SDValue Ops[] = {N->getOperand(0), // Chain
21172  N->getOperand(2), // Pg
21173  Base, Offset, OutVT};
21174 
21175  SDValue Load = DAG.getNode(Opcode, DL, VTs, Ops);
21176  SDValue LoadChain = SDValue(Load.getNode(), 1);
21177 
21178  if (RetVT.isInteger() && (RetVT != HwRetVt))
21179  Load = DAG.getNode(ISD::TRUNCATE, DL, RetVT, Load.getValue(0));
21180 
21181  // If the original return value was FP, bitcast accordingly. Doing it here
21182  // means that we can avoid adding TableGen patterns for FPs.
21183  if (RetVT.isFloatingPoint())
21184  Load = DAG.getNode(ISD::BITCAST, DL, RetVT, Load.getValue(0));
21185 
21186  return DAG.getMergeValues({Load, LoadChain}, DL);
21187 }
21188 
21189 static SDValue
21191  SelectionDAG &DAG) {
21192  SDLoc DL(N);
21193  SDValue Src = N->getOperand(0);
21194  unsigned Opc = Src->getOpcode();
21195 
21196  // Sign extend of an unsigned unpack -> signed unpack
21197  if (Opc == AArch64ISD::UUNPKHI || Opc == AArch64ISD::UUNPKLO) {
21198 
21199  unsigned SOpc = Opc == AArch64ISD::UUNPKHI ? AArch64ISD::SUNPKHI
21201 
21202  // Push the sign extend to the operand of the unpack
21203  // This is necessary where, for example, the operand of the unpack
21204  // is another unpack:
21205  // 4i32 sign_extend_inreg (4i32 uunpklo(8i16 uunpklo (16i8 opnd)), from 4i8)
21206  // ->
21207  // 4i32 sunpklo (8i16 sign_extend_inreg(8i16 uunpklo (16i8 opnd), from 8i8)
21208  // ->
21209  // 4i32 sunpklo(8i16 sunpklo(16i8 opnd))
21210  SDValue ExtOp = Src->getOperand(0);
21211  auto VT = cast<VTSDNode>(N->getOperand(1))->getVT();
21212  EVT EltTy = VT.getVectorElementType();
21213  (void)EltTy;
21214 
21215  assert((EltTy == MVT::i8 || EltTy == MVT::i16 || EltTy == MVT::i32) &&
21216  "Sign extending from an invalid type");
21217 
21218  EVT ExtVT = VT.getDoubleNumVectorElementsVT(*DAG.getContext());
21219 
21221  ExtOp, DAG.getValueType(ExtVT));
21222 
21223  return DAG.getNode(SOpc, DL, N->getValueType(0), Ext);
21224  }
21225 
21226  if (DCI.isBeforeLegalizeOps())
21227  return SDValue();
21228 
21230  return SDValue();
21231 
21232  // SVE load nodes (e.g. AArch64ISD::GLD1) are straightforward candidates
21233  // for DAG Combine with SIGN_EXTEND_INREG. Bail out for all other nodes.
21234  unsigned NewOpc;
21235  unsigned MemVTOpNum = 4;
21236  switch (Opc) {
21238  NewOpc = AArch64ISD::LD1S_MERGE_ZERO;
21239  MemVTOpNum = 3;
21240  break;
21243  MemVTOpNum = 3;
21244  break;
21247  MemVTOpNum = 3;
21248  break;
21251  break;
21254  break;
21257  break;
21260  break;
21263  break;
21266  break;
21269  break;
21272  break;
21275  break;
21278  break;
21281  break;
21284  break;
21287  break;
21290  break;
21293  break;
21294  default:
21295  return SDValue();
21296  }
21297 
21298  EVT SignExtSrcVT = cast<VTSDNode>(N->getOperand(1))->getVT();
21299  EVT SrcMemVT = cast<VTSDNode>(Src->getOperand(MemVTOpNum))->getVT();
21300 
21301  if ((SignExtSrcVT != SrcMemVT) || !Src.hasOneUse())
21302  return SDValue();
21303 
21304  EVT DstVT = N->getValueType(0);
21305  SDVTList VTs = DAG.getVTList(DstVT, MVT::Other);
21306 
21308  for (unsigned I = 0; I < Src->getNumOperands(); ++I)
21309  Ops.push_back(Src->getOperand(I));
21310 
21311  SDValue ExtLoad = DAG.getNode(NewOpc, SDLoc(N), VTs, Ops);
21312  DCI.CombineTo(N, ExtLoad);
21313  DCI.CombineTo(Src.getNode(), ExtLoad, ExtLoad.getValue(1));
21314 
21315  // Return N so it doesn't get rechecked
21316  return SDValue(N, 0);
21317 }
21318 
21319 /// Legalize the gather prefetch (scalar + vector addressing mode) when the
21320 /// offset vector is an unpacked 32-bit scalable vector. The other cases (Offset
21321 /// != nxv2i32) do not need legalization.
21323  const unsigned OffsetPos = 4;
21324  SDValue Offset = N->getOperand(OffsetPos);
21325 
21326  // Not an unpacked vector, bail out.
21327  if (Offset.getValueType().getSimpleVT().SimpleTy != MVT::nxv2i32)
21328  return SDValue();
21329 
21330  // Extend the unpacked offset vector to 64-bit lanes.
21331  SDLoc DL(N);
21333  SmallVector<SDValue, 5> Ops(N->op_begin(), N->op_end());
21334  // Replace the offset operand with the 64-bit one.
21335  Ops[OffsetPos] = Offset;
21336 
21337  return DAG.getNode(N->getOpcode(), DL, DAG.getVTList(MVT::Other), Ops);
21338 }
21339 
21340 /// Combines a node carrying the intrinsic
21341 /// `aarch64_sve_prf<T>_gather_scalar_offset` into a node that uses
21342 /// `aarch64_sve_prfb_gather_uxtw_index` when the scalar offset passed to
21343 /// `aarch64_sve_prf<T>_gather_scalar_offset` is not a valid immediate for the
21344 /// sve gather prefetch instruction with vector plus immediate addressing mode.
21346  unsigned ScalarSizeInBytes) {
21347  const unsigned ImmPos = 4, OffsetPos = 3;
21348  // No need to combine the node if the immediate is valid...
21349  if (isValidImmForSVEVecImmAddrMode(N->getOperand(ImmPos), ScalarSizeInBytes))
21350  return SDValue();
21351 
21352  // ...otherwise swap the offset base with the offset...
21353  SmallVector<SDValue, 5> Ops(N->op_begin(), N->op_end());
21354  std::swap(Ops[ImmPos], Ops[OffsetPos]);
21355  // ...and remap the intrinsic `aarch64_sve_prf<T>_gather_scalar_offset` to
21356  // `aarch64_sve_prfb_gather_uxtw_index`.
21357  SDLoc DL(N);
21358  Ops[1] = DAG.getConstant(Intrinsic::aarch64_sve_prfb_gather_uxtw_index, DL,
21359  MVT::i64);
21360 
21361  return DAG.getNode(N->getOpcode(), DL, DAG.getVTList(MVT::Other), Ops);
21362 }
21363 
21364 // Return true if the vector operation can guarantee only the first lane of its
21365 // result contains data, with all bits in other lanes set to zero.
21367  switch (Op.getOpcode()) {
21368  default:
21369  return false;
21370  case AArch64ISD::ANDV_PRED:
21371  case AArch64ISD::EORV_PRED:
21378  case AArch64ISD::ORV_PRED:
21385  return true;
21386  }
21387 }
21388 
21390  assert(N->getOpcode() == ISD::INSERT_VECTOR_ELT && "Unexpected node!");
21391  SDValue InsertVec = N->getOperand(0);
21392  SDValue InsertElt = N->getOperand(1);
21393  SDValue InsertIdx = N->getOperand(2);
21394 
21395  // We only care about inserts into the first element...
21396  if (!isNullConstant(InsertIdx))
21397  return SDValue();
21398  // ...of a zero'd vector...
21399  if (!ISD::isConstantSplatVectorAllZeros(InsertVec.getNode()))
21400  return SDValue();
21401  // ...where the inserted data was previously extracted...
21402  if (InsertElt.getOpcode() != ISD::EXTRACT_VECTOR_ELT)
21403  return SDValue();
21404 
21405  SDValue ExtractVec = InsertElt.getOperand(0);
21406  SDValue ExtractIdx = InsertElt.getOperand(1);
21407 
21408  // ...from the first element of a vector.
21409  if (!isNullConstant(ExtractIdx))
21410  return SDValue();
21411 
21412  // If we get here we are effectively trying to zero lanes 1-N of a vector.
21413 
21414  // Ensure there's no type conversion going on.
21415  if (N->getValueType(0) != ExtractVec.getValueType())
21416  return SDValue();
21417 
21418  if (!isLanes1toNKnownZero(ExtractVec))
21419  return SDValue();
21420 
21421  // The explicit zeroing is redundant.
21422  return ExtractVec;
21423 }
21424 
21425 static SDValue
21428  return Res;
21429 
21430  return performPostLD1Combine(N, DCI, true);
21431 }
21432 
21434  EVT Ty = N->getValueType(0);
21435  if (Ty.isInteger())
21436  return SDValue();
21437 
21439  EVT ExtIntTy = getPackedSVEVectorVT(IntTy.getVectorElementCount());
21440  if (ExtIntTy.getVectorElementType().getScalarSizeInBits() <
21442  return SDValue();
21443 
21444  SDLoc DL(N);
21445  SDValue LHS = DAG.getAnyExtOrTrunc(DAG.getBitcast(IntTy, N->getOperand(0)),
21446  DL, ExtIntTy);
21447  SDValue RHS = DAG.getAnyExtOrTrunc(DAG.getBitcast(IntTy, N->getOperand(1)),
21448  DL, ExtIntTy);
21449  SDValue Idx = N->getOperand(2);
21450  SDValue Splice = DAG.getNode(ISD::VECTOR_SPLICE, DL, ExtIntTy, LHS, RHS, Idx);
21451  SDValue Trunc = DAG.getAnyExtOrTrunc(Splice, DL, IntTy);
21452  return DAG.getBitcast(Ty, Trunc);
21453 }
21454 
21457  const AArch64Subtarget *Subtarget) {
21458  SDValue N0 = N->getOperand(0);
21459  EVT VT = N->getValueType(0);
21460 
21461  // If this is fp_round(fpextend), don't fold it, allow ourselves to be folded.
21462  if (N->hasOneUse() && N->use_begin()->getOpcode() == ISD::FP_ROUND)
21463  return SDValue();
21464 
21465  auto hasValidElementTypeForFPExtLoad = [](EVT VT) {
21466  EVT EltVT = VT.getVectorElementType();
21467  return EltVT == MVT::f32 || EltVT == MVT::f64;
21468  };
21469 
21470  // fold (fpext (load x)) -> (fpext (fptrunc (extload x)))
21471  // We purposefully don't care about legality of the nodes here as we know
21472  // they can be split down into something legal.
21473  if (DCI.isBeforeLegalizeOps() && ISD::isNormalLoad(N0.getNode()) &&
21474  N0.hasOneUse() && Subtarget->useSVEForFixedLengthVectors() &&
21475  VT.isFixedLengthVector() && hasValidElementTypeForFPExtLoad(VT) &&
21476  VT.getFixedSizeInBits() >= Subtarget->getMinSVEVectorSizeInBits()) {
21477  LoadSDNode *LN0 = cast<LoadSDNode>(N0);
21478  SDValue ExtLoad = DAG.getExtLoad(ISD::EXTLOAD, SDLoc(N), VT,
21479  LN0->getChain(), LN0->getBasePtr(),
21480  N0.getValueType(), LN0->getMemOperand());
21481  DCI.CombineTo(N, ExtLoad);
21482  DCI.CombineTo(
21483  N0.getNode(),
21484  DAG.getNode(ISD::FP_ROUND, SDLoc(N0), N0.getValueType(), ExtLoad,
21485  DAG.getIntPtrConstant(1, SDLoc(N0), /*isTarget=*/true)),
21486  ExtLoad.getValue(1));
21487  return SDValue(N, 0); // Return N so it doesn't get rechecked!
21488  }
21489 
21490  return SDValue();
21491 }
21492 
21494  const AArch64Subtarget *Subtarget) {
21495  EVT VT = N->getValueType(0);
21496 
21497  // Don't expand for NEON, SVE2 or SME
21498  if (!VT.isScalableVector() || Subtarget->hasSVE2() || Subtarget->hasSME())
21499  return SDValue();
21500 
21501  SDLoc DL(N);
21502 
21503  SDValue Mask = N->getOperand(0);
21504  SDValue In1 = N->getOperand(1);
21505  SDValue In2 = N->getOperand(2);
21506 
21507  SDValue InvMask = DAG.getNOT(DL, Mask, VT);
21508  SDValue Sel = DAG.getNode(ISD::AND, DL, VT, Mask, In1);
21509  SDValue SelInv = DAG.getNode(ISD::AND, DL, VT, InvMask, In2);
21510  return DAG.getNode(ISD::OR, DL, VT, Sel, SelInv);
21511 }
21512 
21514  EVT VT = N->getValueType(0);
21515 
21516  SDValue Insert = N->getOperand(0);
21517  if (Insert.getOpcode() != ISD::INSERT_SUBVECTOR)
21518  return SDValue();
21519 
21520  if (!Insert.getOperand(0).isUndef())
21521  return SDValue();
21522 
21523  uint64_t IdxInsert = Insert.getConstantOperandVal(2);
21524  uint64_t IdxDupLane = N->getConstantOperandVal(1);
21525  if (IdxInsert != 0 || IdxDupLane != 0)
21526  return SDValue();
21527 
21528  SDValue Bitcast = Insert.getOperand(1);
21529  if (Bitcast.getOpcode() != ISD::BITCAST)
21530  return SDValue();
21531 
21532  SDValue Subvec = Bitcast.getOperand(0);
21533  EVT SubvecVT = Subvec.getValueType();
21534  if (!SubvecVT.is128BitVector())
21535  return SDValue();
21536  EVT NewSubvecVT =
21538 
21539  SDLoc DL(N);
21540  SDValue NewInsert =
21541  DAG.getNode(ISD::INSERT_SUBVECTOR, DL, NewSubvecVT,
21542  DAG.getUNDEF(NewSubvecVT), Subvec, Insert->getOperand(2));
21543  SDValue NewDuplane128 = DAG.getNode(AArch64ISD::DUPLANE128, DL, NewSubvecVT,
21544  NewInsert, N->getOperand(1));
21545  return DAG.getNode(ISD::BITCAST, DL, VT, NewDuplane128);
21546 }
21547 
21549  DAGCombinerInfo &DCI) const {
21550  SelectionDAG &DAG = DCI.DAG;
21551  switch (N->getOpcode()) {
21552  default:
21553  LLVM_DEBUG(dbgs() << "Custom combining: skipping\n");
21554  break;
21555  case ISD::ADD:
21556  case ISD::SUB:
21557  return performAddSubCombine(N, DCI, DAG);
21558  case ISD::BUILD_VECTOR:
21559  return performBuildVectorCombine(N, DCI, DAG);
21560  case ISD::TRUNCATE:
21561  return performTruncateCombine(N, DAG);
21562  case AArch64ISD::ANDS:
21563  return performFlagSettingCombine(N, DCI, ISD::AND);
21564  case AArch64ISD::ADC:
21565  if (auto R = foldOverflowCheck(N, DAG, /* IsAdd */ true))
21566  return R;
21567  return foldADCToCINC(N, DAG);
21568  case AArch64ISD::SBC:
21569  return foldOverflowCheck(N, DAG, /* IsAdd */ false);
21570  case AArch64ISD::ADCS:
21571  if (auto R = foldOverflowCheck(N, DAG, /* IsAdd */ true))
21572  return R;
21574  case AArch64ISD::SBCS:
21575  if (auto R = foldOverflowCheck(N, DAG, /* IsAdd */ false))
21576  return R;
21578  case ISD::XOR:
21579  return performXorCombine(N, DAG, DCI, Subtarget);
21580  case ISD::MUL:
21581  return performMulCombine(N, DAG, DCI, Subtarget);
21582  case ISD::SINT_TO_FP:
21583  case ISD::UINT_TO_FP:
21584  return performIntToFpCombine(N, DAG, Subtarget);
21585  case ISD::FP_TO_SINT:
21586  case ISD::FP_TO_UINT:
21587  case ISD::FP_TO_SINT_SAT:
21588  case ISD::FP_TO_UINT_SAT:
21589  return performFpToIntCombine(N, DAG, DCI, Subtarget);
21590  case ISD::FDIV:
21591  return performFDivCombine(N, DAG, DCI, Subtarget);
21592  case ISD::OR:
21593  return performORCombine(N, DCI, Subtarget, *this);
21594  case ISD::AND:
21595  return performANDCombine(N, DCI);
21597  return performIntrinsicCombine(N, DCI, Subtarget);
21598  case ISD::ANY_EXTEND:
21599  case ISD::ZERO_EXTEND:
21600  case ISD::SIGN_EXTEND:
21601  return performExtendCombine(N, DCI, DAG);
21603  return performSignExtendInRegCombine(N, DCI, DAG);
21604  case ISD::CONCAT_VECTORS:
21605  return performConcatVectorsCombine(N, DCI, DAG);
21607  return performExtractSubvectorCombine(N, DCI, DAG);
21608  case ISD::INSERT_SUBVECTOR:
21609  return performInsertSubvectorCombine(N, DCI, DAG);
21610  case ISD::SELECT:
21611  return performSelectCombine(N, DCI);
21612  case ISD::VSELECT:
21613  return performVSelectCombine(N, DCI.DAG);
21614  case ISD::SETCC:
21615  return performSETCCCombine(N, DCI, DAG);
21616  case ISD::LOAD:
21617  return performLOADCombine(N, DCI, DAG, Subtarget);
21618  case ISD::STORE:
21619  return performSTORECombine(N, DCI, DAG, Subtarget);
21620  case ISD::MSTORE:
21621  return performMSTORECombine(N, DCI, DAG, Subtarget);
21622  case ISD::MGATHER:
21623  case ISD::MSCATTER:
21624  return performMaskedGatherScatterCombine(N, DCI, DAG);
21625  case ISD::VECTOR_SPLICE:
21626  return performSVESpliceCombine(N, DAG);
21627  case ISD::FP_EXTEND:
21628  return performFPExtendCombine(N, DAG, DCI, Subtarget);
21629  case AArch64ISD::BRCOND:
21630  return performBRCONDCombine(N, DCI, DAG);
21631  case AArch64ISD::TBNZ:
21632  case AArch64ISD::TBZ:
21633  return performTBZCombine(N, DCI, DAG);
21634  case AArch64ISD::CSEL:
21635  return performCSELCombine(N, DCI, DAG);
21636  case AArch64ISD::DUP:
21637  return performDUPCombine(N, DCI);
21639  return performDupLane128Combine(N, DAG);
21640  case AArch64ISD::NVCAST:
21641  return performNVCASTCombine(N);
21642  case AArch64ISD::SPLICE:
21643  return performSpliceCombine(N, DAG);
21644  case AArch64ISD::UUNPKLO:
21645  case AArch64ISD::UUNPKHI:
21646  return performUnpackCombine(N, DAG, Subtarget);
21647  case AArch64ISD::UZP1:
21648  return performUzpCombine(N, DAG);
21650  return performSetccMergeZeroCombine(N, DCI);
21667  return performGLD1Combine(N, DAG);
21668  case AArch64ISD::VASHR:
21669  case AArch64ISD::VLSHR:
21670  return performVectorShiftCombine(N, *this, DCI);
21671  case AArch64ISD::SUNPKLO:
21672  return performSunpkloCombine(N, DAG);
21673  case AArch64ISD::BSP:
21674  return performBSPExpandForSVE(N, DAG, Subtarget);
21676  return performInsertVectorEltCombine(N, DCI);
21678  return performExtractVectorEltCombine(N, DCI, Subtarget);
21679  case ISD::VECREDUCE_ADD:
21680  return performVecReduceAddCombine(N, DCI.DAG, Subtarget);
21681  case AArch64ISD::UADDV:
21682  return performUADDVCombine(N, DAG);
21683  case AArch64ISD::SMULL:
21684  case AArch64ISD::UMULL:
21685  case AArch64ISD::PMULL:
21687  case ISD::INTRINSIC_VOID:
21689  switch (cast<ConstantSDNode>(N->getOperand(1))->getZExtValue()) {
21690  case Intrinsic::aarch64_sve_prfb_gather_scalar_offset:
21691  return combineSVEPrefetchVecBaseImmOff(N, DAG, 1 /*=ScalarSizeInBytes*/);
21692  case Intrinsic::aarch64_sve_prfh_gather_scalar_offset:
21693  return combineSVEPrefetchVecBaseImmOff(N, DAG, 2 /*=ScalarSizeInBytes*/);
21694  case Intrinsic::aarch64_sve_prfw_gather_scalar_offset:
21695  return combineSVEPrefetchVecBaseImmOff(N, DAG, 4 /*=ScalarSizeInBytes*/);
21696  case Intrinsic::aarch64_sve_prfd_gather_scalar_offset:
21697  return combineSVEPrefetchVecBaseImmOff(N, DAG, 8 /*=ScalarSizeInBytes*/);
21698  case Intrinsic::aarch64_sve_prfb_gather_uxtw_index:
21699  case Intrinsic::aarch64_sve_prfb_gather_sxtw_index:
21700  case Intrinsic::aarch64_sve_prfh_gather_uxtw_index:
21701  case Intrinsic::aarch64_sve_prfh_gather_sxtw_index:
21702  case Intrinsic::aarch64_sve_prfw_gather_uxtw_index:
21703  case Intrinsic::aarch64_sve_prfw_gather_sxtw_index:
21704  case Intrinsic::aarch64_sve_prfd_gather_uxtw_index:
21705  case Intrinsic::aarch64_sve_prfd_gather_sxtw_index:
21706  return legalizeSVEGatherPrefetchOffsVec(N, DAG);
21707  case Intrinsic::aarch64_neon_ld2:
21708  case Intrinsic::aarch64_neon_ld3:
21709  case Intrinsic::aarch64_neon_ld4:
21710  case Intrinsic::aarch64_neon_ld1x2:
21711  case Intrinsic::aarch64_neon_ld1x3:
21712  case Intrinsic::aarch64_neon_ld1x4:
21713  case Intrinsic::aarch64_neon_ld2lane:
21714  case Intrinsic::aarch64_neon_ld3lane:
21715  case Intrinsic::aarch64_neon_ld4lane:
21716  case Intrinsic::aarch64_neon_ld2r:
21717  case Intrinsic::aarch64_neon_ld3r:
21718  case Intrinsic::aarch64_neon_ld4r:
21719  case Intrinsic::aarch64_neon_st2:
21720  case Intrinsic::aarch64_neon_st3:
21721  case Intrinsic::aarch64_neon_st4:
21722  case Intrinsic::aarch64_neon_st1x2:
21723  case Intrinsic::aarch64_neon_st1x3:
21724  case Intrinsic::aarch64_neon_st1x4:
21725  case Intrinsic::aarch64_neon_st2lane:
21726  case Intrinsic::aarch64_neon_st3lane:
21727  case Intrinsic::aarch64_neon_st4lane:
21728  return performNEONPostLDSTCombine(N, DCI, DAG);
21729  case Intrinsic::aarch64_sve_ldnt1:
21730  return performLDNT1Combine(N, DAG);
21731  case Intrinsic::aarch64_sve_ld1rq:
21732  return performLD1ReplicateCombine<AArch64ISD::LD1RQ_MERGE_ZERO>(N, DAG);
21733  case Intrinsic::aarch64_sve_ld1ro:
21734  return performLD1ReplicateCombine<AArch64ISD::LD1RO_MERGE_ZERO>(N, DAG);
21735  case Intrinsic::aarch64_sve_ldnt1_gather_scalar_offset:
21737  case Intrinsic::aarch64_sve_ldnt1_gather:
21739  case Intrinsic::aarch64_sve_ldnt1_gather_index:
21740  return performGatherLoadCombine(N, DAG,
21742  case Intrinsic::aarch64_sve_ldnt1_gather_uxtw:
21744  case Intrinsic::aarch64_sve_ld1:
21746  case Intrinsic::aarch64_sve_ldnf1:
21748  case Intrinsic::aarch64_sve_ldff1:
21750  case Intrinsic::aarch64_sve_st1:
21751  return performST1Combine(N, DAG);
21752  case Intrinsic::aarch64_sve_stnt1:
21753  return performSTNT1Combine(N, DAG);
21754  case Intrinsic::aarch64_sve_stnt1_scatter_scalar_offset:
21756  case Intrinsic::aarch64_sve_stnt1_scatter_uxtw:
21758  case Intrinsic::aarch64_sve_stnt1_scatter:
21760  case Intrinsic::aarch64_sve_stnt1_scatter_index:
21762  case Intrinsic::aarch64_sve_ld1_gather:
21764  case Intrinsic::aarch64_sve_ld1_gather_index:
21765  return performGatherLoadCombine(N, DAG,
21767  case Intrinsic::aarch64_sve_ld1_gather_sxtw:
21769  /*OnlyPackedOffsets=*/false);
21770  case Intrinsic::aarch64_sve_ld1_gather_uxtw:
21772  /*OnlyPackedOffsets=*/false);
21773  case Intrinsic::aarch64_sve_ld1_gather_sxtw_index:
21774  return performGatherLoadCombine(N, DAG,
21776  /*OnlyPackedOffsets=*/false);
21777  case Intrinsic::aarch64_sve_ld1_gather_uxtw_index:
21778  return performGatherLoadCombine(N, DAG,
21780  /*OnlyPackedOffsets=*/false);
21781  case Intrinsic::aarch64_sve_ld1_gather_scalar_offset:
21783  case Intrinsic::aarch64_sve_ldff1_gather:
21785  case Intrinsic::aarch64_sve_ldff1_gather_index:
21786  return performGatherLoadCombine(N, DAG,
21788  case Intrinsic::aarch64_sve_ldff1_gather_sxtw:
21789  return performGatherLoadCombine(N, DAG,
21791  /*OnlyPackedOffsets=*/false);
21792  case Intrinsic::aarch64_sve_ldff1_gather_uxtw:
21793  return performGatherLoadCombine(N, DAG,
21795  /*OnlyPackedOffsets=*/false);
21796  case Intrinsic::aarch64_sve_ldff1_gather_sxtw_index:
21797  return performGatherLoadCombine(N, DAG,
21799  /*OnlyPackedOffsets=*/false);
21800  case Intrinsic::aarch64_sve_ldff1_gather_uxtw_index:
21801  return performGatherLoadCombine(N, DAG,
21803  /*OnlyPackedOffsets=*/false);
21804  case Intrinsic::aarch64_sve_ldff1_gather_scalar_offset:
21805  return performGatherLoadCombine(N, DAG,
21807  case Intrinsic::aarch64_sve_st1_scatter:
21809  case Intrinsic::aarch64_sve_st1_scatter_index:
21811  case Intrinsic::aarch64_sve_st1_scatter_sxtw:
21813  /*OnlyPackedOffsets=*/false);
21814  case Intrinsic::aarch64_sve_st1_scatter_uxtw:
21816  /*OnlyPackedOffsets=*/false);
21817  case Intrinsic::aarch64_sve_st1_scatter_sxtw_index:
21818  return performScatterStoreCombine(N, DAG,
21820  /*OnlyPackedOffsets=*/false);
21821  case Intrinsic::aarch64_sve_st1_scatter_uxtw_index:
21822  return performScatterStoreCombine(N, DAG,
21824  /*OnlyPackedOffsets=*/false);
21825  case Intrinsic::aarch64_sve_st1_scatter_scalar_offset:
21827  case Intrinsic::aarch64_rndr:
21828  case Intrinsic::aarch64_rndrrs: {
21829  unsigned IntrinsicID =
21830  cast<ConstantSDNode>(N->getOperand(1))->getZExtValue();
21831  auto Register =
21832  (IntrinsicID == Intrinsic::aarch64_rndr ? AArch64SysReg::RNDR
21833  : AArch64SysReg::RNDRRS);
21834  SDLoc DL(N);
21835  SDValue A = DAG.getNode(
21837  N->getOperand(0), DAG.getConstant(Register, DL, MVT::i64));
21838  SDValue B = DAG.getNode(
21840  DAG.getConstant(0, DL, MVT::i32),
21841  DAG.getConstant(AArch64CC::NE, DL, MVT::i32), A.getValue(1));
21842  return DAG.getMergeValues(
21843  {A, DAG.getZExtOrTrunc(B, DL, MVT::i1), A.getValue(2)}, DL);
21844  }
21845  default:
21846  break;
21847  }
21848  break;
21849  case ISD::GlobalAddress:
21850  return performGlobalAddressCombine(N, DAG, Subtarget, getTargetMachine());
21851  case ISD::CTLZ:
21852  return performCTLZCombine(N, DAG, Subtarget);
21853  }
21854  return SDValue();
21855 }
21856 
21857 // Check if the return value is used as only a return value, as otherwise
21858 // we can't perform a tail-call. In particular, we need to check for
21859 // target ISD nodes that are returns and any other "odd" constructs
21860 // that the generic analysis code won't necessarily catch.
21861 bool AArch64TargetLowering::isUsedByReturnOnly(SDNode *N,
21862  SDValue &Chain) const {
21863  if (N->getNumValues() != 1)
21864  return false;
21865  if (!N->hasNUsesOfValue(1, 0))
21866  return false;
21867 
21868  SDValue TCChain = Chain;
21869  SDNode *Copy = *N->use_begin();
21870  if (Copy->getOpcode() == ISD::CopyToReg) {
21871  // If the copy has a glue operand, we conservatively assume it isn't safe to
21872  // perform a tail call.
21873  if (Copy->getOperand(Copy->getNumOperands() - 1).getValueType() ==
21874  MVT::Glue)
21875  return false;
21876  TCChain = Copy->getOperand(0);
21877  } else if (Copy->getOpcode() != ISD::FP_EXTEND)
21878  return false;
21879 
21880  bool HasRet = false;
21881  for (SDNode *Node : Copy->uses()) {
21882  if (Node->getOpcode() != AArch64ISD::RET_FLAG)
21883  return false;
21884  HasRet = true;
21885  }
21886 
21887  if (!HasRet)
21888  return false;
21889 
21890  Chain = TCChain;
21891  return true;
21892 }
21893 
21894 // Return whether the an instruction can potentially be optimized to a tail
21895 // call. This will cause the optimizers to attempt to move, or duplicate,
21896 // return instructions to help enable tail call optimizations for this
21897 // instruction.
21898 bool AArch64TargetLowering::mayBeEmittedAsTailCall(const CallInst *CI) const {
21899  return CI->isTailCall();
21900 }
21901 
21902 bool AArch64TargetLowering::getIndexedAddressParts(SDNode *N, SDNode *Op,
21903  SDValue &Base,
21904  SDValue &Offset,
21905  SelectionDAG &DAG) const {
21906  if (Op->getOpcode() != ISD::ADD && Op->getOpcode() != ISD::SUB)
21907  return false;
21908 
21909  // Non-null if there is exactly one user of the loaded value (ignoring chain).
21910  SDNode *ValOnlyUser = nullptr;
21911  for (SDNode::use_iterator UI = N->use_begin(), UE = N->use_end(); UI != UE;
21912  ++UI) {
21913  if (UI.getUse().getResNo() == 1)
21914  continue; // Ignore chain.
21915  if (ValOnlyUser == nullptr)
21916  ValOnlyUser = *UI;
21917  else {
21918  ValOnlyUser = nullptr; // Multiple non-chain uses, bail out.
21919  break;
21920  }
21921  }
21922 
21923  auto IsUndefOrZero = [](SDValue V) {
21924  return V.isUndef() || isNullOrNullSplat(V, /*AllowUndefs*/ true);
21925  };
21926 
21927  // If the only user of the value is a scalable vector splat, it is
21928  // preferable to do a replicating load (ld1r*).
21929  if (ValOnlyUser && ValOnlyUser->getValueType(0).isScalableVector() &&
21930  (ValOnlyUser->getOpcode() == ISD::SPLAT_VECTOR ||
21931  (ValOnlyUser->getOpcode() == AArch64ISD::DUP_MERGE_PASSTHRU &&
21932  IsUndefOrZero(ValOnlyUser->getOperand(2)))))
21933  return false;
21934 
21935  Base = Op->getOperand(0);
21936  // All of the indexed addressing mode instructions take a signed
21937  // 9 bit immediate offset.
21938  if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(Op->getOperand(1))) {
21939  int64_t RHSC = RHS->getSExtValue();
21940  if (Op->getOpcode() == ISD::SUB)
21941  RHSC = -(uint64_t)RHSC;
21942  if (!isInt<9>(RHSC))
21943  return false;
21944  // Always emit pre-inc/post-inc addressing mode. Use negated constant offset
21945  // when dealing with subtraction.
21946  Offset = DAG.getConstant(RHSC, SDLoc(N), RHS->getValueType(0));
21947  return true;
21948  }
21949  return false;
21950 }
21951 
21952 bool AArch64TargetLowering::getPreIndexedAddressParts(SDNode *N, SDValue &Base,
21953  SDValue &Offset,
21954  ISD::MemIndexedMode &AM,
21955  SelectionDAG &DAG) const {
21956  EVT VT;
21957  SDValue Ptr;
21958  if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N)) {
21959  VT = LD->getMemoryVT();
21960  Ptr = LD->getBasePtr();
21961  } else if (StoreSDNode *ST = dyn_cast<StoreSDNode>(N)) {
21962  VT = ST->getMemoryVT();
21963  Ptr = ST->getBasePtr();
21964  } else
21965  return false;
21966 
21967  if (!getIndexedAddressParts(N, Ptr.getNode(), Base, Offset, DAG))
21968  return false;
21969  AM = ISD::PRE_INC;
21970  return true;
21971 }
21972 
21973 bool AArch64TargetLowering::getPostIndexedAddressParts(
21974  SDNode *N, SDNode *Op, SDValue &Base, SDValue &Offset,
21975  ISD::MemIndexedMode &AM, SelectionDAG &DAG) const {
21976  EVT VT;
21977  SDValue Ptr;
21978  if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N)) {
21979  VT = LD->getMemoryVT();
21980  Ptr = LD->getBasePtr();
21981  } else if (StoreSDNode *ST = dyn_cast<StoreSDNode>(N)) {
21982  VT = ST->getMemoryVT();
21983  Ptr = ST->getBasePtr();
21984  } else
21985  return false;
21986 
21987  if (!getIndexedAddressParts(N, Op, Base, Offset, DAG))
21988  return false;
21989  // Post-indexing updates the base, so it's not a valid transform
21990  // if that's not the same as the load's pointer.
21991  if (Ptr != Base)
21992  return false;
21993  AM = ISD::POST_INC;
21994  return true;
21995 }
21996 
21997 void AArch64TargetLowering::ReplaceBITCASTResults(
21999  SDLoc DL(N);
22000  SDValue Op = N->getOperand(0);
22001  EVT VT = N->getValueType(0);
22002  EVT SrcVT = Op.getValueType();
22003 
22004  if (VT.isScalableVector() && !isTypeLegal(VT) && isTypeLegal(SrcVT)) {
22005  assert(!VT.isFloatingPoint() && SrcVT.isFloatingPoint() &&
22006  "Expected fp->int bitcast!");
22007 
22008  // Bitcasting between unpacked vector types of different element counts is
22009  // not a NOP because the live elements are laid out differently.
22010  // 01234567
22011  // e.g. nxv2i32 = XX??XX??
22012  // nxv4f16 = X?X?X?X?
22013  if (VT.getVectorElementCount() != SrcVT.getVectorElementCount())
22014  return;
22015 
22016  SDValue CastResult = getSVESafeBitCast(getSVEContainerType(VT), Op, DAG);
22017  Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, VT, CastResult));
22018  return;
22019  }
22020 
22021  if (VT != MVT::i16 || (SrcVT != MVT::f16 && SrcVT != MVT::bf16))
22022  return;
22023 
22024  Op = SDValue(
22025  DAG.getMachineNode(TargetOpcode::INSERT_SUBREG, DL, MVT::f32,
22026  DAG.getUNDEF(MVT::i32), Op,
22027  DAG.getTargetConstant(AArch64::hsub, DL, MVT::i32)),
22028  0);
22029  Op = DAG.getNode(ISD::BITCAST, DL, MVT::i32, Op);
22030  Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i16, Op));
22031 }
22032 
22034  SelectionDAG &DAG,
22035  const AArch64Subtarget *Subtarget) {
22036  EVT VT = N->getValueType(0);
22037  if (!VT.is256BitVector() ||
22038  (VT.getScalarType().isFloatingPoint() &&
22039  !N->getFlags().hasAllowReassociation()) ||
22040  (VT.getScalarType() == MVT::f16 && !Subtarget->hasFullFP16()))
22041  return;
22042 
22043  SDValue X = N->getOperand(0);
22044  auto *Shuf = dyn_cast<ShuffleVectorSDNode>(N->getOperand(1));
22045  if (!Shuf) {
22046  Shuf = dyn_cast<ShuffleVectorSDNode>(N->getOperand(0));
22047  X = N->getOperand(1);
22048  if (!Shuf)
22049  return;
22050  }
22051 
22052  if (Shuf->getOperand(0) != X || !Shuf->getOperand(1)->isUndef())
22053  return;
22054 
22055  // Check the mask is 1,0,3,2,5,4,...
22056  ArrayRef<int> Mask = Shuf->getMask();
22057  for (int I = 0, E = Mask.size(); I < E; I++)
22058  if (Mask[I] != (I % 2 == 0 ? I + 1 : I - 1))
22059  return;
22060 
22061  SDLoc DL(N);
22062  auto LoHi = DAG.SplitVector(X, DL);
22063  assert(LoHi.first.getValueType() == LoHi.second.getValueType());
22064  SDValue Addp = DAG.getNode(AArch64ISD::ADDP, N, LoHi.first.getValueType(),
22065  LoHi.first, LoHi.second);
22066 
22067  // Shuffle the elements back into order.
22068  SmallVector<int> NMask;
22069  for (unsigned I = 0, E = VT.getVectorNumElements() / 2; I < E; I++) {
22070  NMask.push_back(I);
22071  NMask.push_back(I);
22072  }
22073  Results.push_back(
22074  DAG.getVectorShuffle(VT, DL,
22075  DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, Addp,
22076  DAG.getUNDEF(LoHi.first.getValueType())),
22077  DAG.getUNDEF(VT), NMask));
22078 }
22079 
22082  SelectionDAG &DAG, unsigned InterOp,
22083  unsigned AcrossOp) {
22084  EVT LoVT, HiVT;
22085  SDValue Lo, Hi;
22086  SDLoc dl(N);
22087  std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(N->getValueType(0));
22088  std::tie(Lo, Hi) = DAG.SplitVectorOperand(N, 0);
22089  SDValue InterVal = DAG.getNode(InterOp, dl, LoVT, Lo, Hi);
22090  SDValue SplitVal = DAG.getNode(AcrossOp, dl, LoVT, InterVal);
22091  Results.push_back(SplitVal);
22092 }
22093 
22094 static std::pair<SDValue, SDValue> splitInt128(SDValue N, SelectionDAG &DAG) {
22095  SDLoc DL(N);
22096  SDValue Lo = DAG.getNode(ISD::TRUNCATE, DL, MVT::i64, N);
22098  DAG.getNode(ISD::SRL, DL, MVT::i128, N,
22099  DAG.getConstant(64, DL, MVT::i64)));
22100  return std::make_pair(Lo, Hi);
22101 }
22102 
22103 void AArch64TargetLowering::ReplaceExtractSubVectorResults(
22105  SDValue In = N->getOperand(0);
22106  EVT InVT = In.getValueType();
22107 
22108  // Common code will handle these just fine.
22109  if (!InVT.isScalableVector() || !InVT.isInteger())
22110  return;
22111 
22112  SDLoc DL(N);
22113  EVT VT = N->getValueType(0);
22114 
22115  // The following checks bail if this is not a halving operation.
22116 
22117  ElementCount ResEC = VT.getVectorElementCount();
22118 
22119  if (InVT.getVectorElementCount() != (ResEC * 2))
22120  return;
22121 
22122  auto *CIndex = dyn_cast<ConstantSDNode>(N->getOperand(1));
22123  if (!CIndex)
22124  return;
22125 
22126  unsigned Index = CIndex->getZExtValue();
22127  if ((Index != 0) && (Index != ResEC.getKnownMinValue()))
22128  return;
22129 
22130  unsigned Opcode = (Index == 0) ? AArch64ISD::UUNPKLO : AArch64ISD::UUNPKHI;
22131  EVT ExtendedHalfVT = VT.widenIntegerVectorElementType(*DAG.getContext());
22132 
22133  SDValue Half = DAG.getNode(Opcode, DL, ExtendedHalfVT, N->getOperand(0));
22134  Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, VT, Half));
22135 }
22136 
22137 // Create an even/odd pair of X registers holding integer value V.
22139  SDLoc dl(V.getNode());
22140  SDValue VLo = DAG.getAnyExtOrTrunc(V, dl, MVT::i64);
22141  SDValue VHi = DAG.getAnyExtOrTrunc(
22142  DAG.getNode(ISD::SRL, dl, MVT::i128, V, DAG.getConstant(64, dl, MVT::i64)),
22143  dl, MVT::i64);
22144  if (DAG.getDataLayout().isBigEndian())
22145  std::swap (VLo, VHi);
22146  SDValue RegClass =
22147  DAG.getTargetConstant(AArch64::XSeqPairsClassRegClassID, dl, MVT::i32);
22148  SDValue SubReg0 = DAG.getTargetConstant(AArch64::sube64, dl, MVT::i32);
22149  SDValue SubReg1 = DAG.getTargetConstant(AArch64::subo64, dl, MVT::i32);
22150  const SDValue Ops[] = { RegClass, VLo, SubReg0, VHi, SubReg1 };
22151  return SDValue(
22152  DAG.getMachineNode(TargetOpcode::REG_SEQUENCE, dl, MVT::Untyped, Ops), 0);
22153 }
22154 
22157  SelectionDAG &DAG,
22158  const AArch64Subtarget *Subtarget) {
22159  assert(N->getValueType(0) == MVT::i128 &&
22160  "AtomicCmpSwap on types less than 128 should be legal");
22161 
22162  MachineMemOperand *MemOp = cast<MemSDNode>(N)->getMemOperand();
22163  if (Subtarget->hasLSE() || Subtarget->outlineAtomics()) {
22164  // LSE has a 128-bit compare and swap (CASP), but i128 is not a legal type,
22165  // so lower it here, wrapped in REG_SEQUENCE and EXTRACT_SUBREG.
22166  SDValue Ops[] = {
22167  createGPRPairNode(DAG, N->getOperand(2)), // Compare value
22168  createGPRPairNode(DAG, N->getOperand(3)), // Store value
22169  N->getOperand(1), // Ptr
22170  N->getOperand(0), // Chain in
22171  };
22172 
22173  unsigned Opcode;
22174  switch (MemOp->getMergedOrdering()) {
22176  Opcode = AArch64::CASPX;
22177  break;
22179  Opcode = AArch64::CASPAX;
22180  break;
22182  Opcode = AArch64::CASPLX;
22183  break;
22186  Opcode = AArch64::CASPALX;
22187  break;
22188  default:
22189  llvm_unreachable("Unexpected ordering!");
22190  }
22191 
22192  MachineSDNode *CmpSwap = DAG.getMachineNode(
22193  Opcode, SDLoc(N), DAG.getVTList(MVT::Untyped, MVT::Other), Ops);
22194  DAG.setNodeMemRefs(CmpSwap, {MemOp});
22195 
22196  unsigned SubReg1 = AArch64::sube64, SubReg2 = AArch64::subo64;
22197  if (DAG.getDataLayout().isBigEndian())
22198  std::swap(SubReg1, SubReg2);
22199  SDValue Lo = DAG.getTargetExtractSubreg(SubReg1, SDLoc(N), MVT::i64,
22200  SDValue(CmpSwap, 0));
22201  SDValue Hi = DAG.getTargetExtractSubreg(SubReg2, SDLoc(N), MVT::i64,
22202  SDValue(CmpSwap, 0));
22203  Results.push_back(
22204  DAG.getNode(ISD::BUILD_PAIR, SDLoc(N), MVT::i128, Lo, Hi));
22205  Results.push_back(SDValue(CmpSwap, 1)); // Chain out
22206  return;
22207  }
22208 
22209  unsigned Opcode;
22210  switch (MemOp->getMergedOrdering()) {
22212  Opcode = AArch64::CMP_SWAP_128_MONOTONIC;
22213  break;
22215  Opcode = AArch64::CMP_SWAP_128_ACQUIRE;
22216  break;
22218  Opcode = AArch64::CMP_SWAP_128_RELEASE;
22219  break;
22222  Opcode = AArch64::CMP_SWAP_128;
22223  break;
22224  default:
22225  llvm_unreachable("Unexpected ordering!");
22226  }
22227 
22228  auto Desired = splitInt128(N->getOperand(2), DAG);
22229  auto New = splitInt128(N->getOperand(3), DAG);
22230  SDValue Ops[] = {N->getOperand(1), Desired.first, Desired.second,
22231  New.first, New.second, N->getOperand(0)};
22232  SDNode *CmpSwap = DAG.getMachineNode(
22233  Opcode, SDLoc(N), DAG.getVTList(MVT::i64, MVT::i64, MVT::i32, MVT::Other),
22234  Ops);
22235  DAG.setNodeMemRefs(cast<MachineSDNode>(CmpSwap), {MemOp});
22236 
22237  Results.push_back(DAG.getNode(ISD::BUILD_PAIR, SDLoc(N), MVT::i128,
22238  SDValue(CmpSwap, 0), SDValue(CmpSwap, 1)));
22239  Results.push_back(SDValue(CmpSwap, 3));
22240 }
22241 
22242 static unsigned getAtomicLoad128Opcode(unsigned ISDOpcode,
22243  AtomicOrdering Ordering) {
22244  // ATOMIC_LOAD_CLR only appears when lowering ATOMIC_LOAD_AND (see
22245  // LowerATOMIC_LOAD_AND). We can't take that approach with 128-bit, because
22246  // the type is not legal. Therefore we shouldn't expect to see a 128-bit
22247  // ATOMIC_LOAD_CLR at any point.
22248  assert(ISDOpcode != ISD::ATOMIC_LOAD_CLR &&
22249  "ATOMIC_LOAD_AND should be lowered to LDCLRP directly");
22250  assert(ISDOpcode != ISD::ATOMIC_LOAD_ADD && "There is no 128 bit LDADD");
22251  assert(ISDOpcode != ISD::ATOMIC_LOAD_SUB && "There is no 128 bit LDSUB");
22252 
22253  if (ISDOpcode == ISD::ATOMIC_LOAD_AND) {
22254  // The operand will need to be XORed in a separate step.
22255  switch (Ordering) {
22257  return AArch64::LDCLRP;
22258  break;
22260  return AArch64::LDCLRPA;
22261  break;
22263  return AArch64::LDCLRPL;
22264  break;
22267  return AArch64::LDCLRPAL;
22268  break;
22269  default:
22270  llvm_unreachable("Unexpected ordering!");
22271  }
22272  }
22273 
22274  if (ISDOpcode == ISD::ATOMIC_LOAD_OR) {
22275  switch (Ordering) {
22277  return AArch64::LDSETP;
22278  break;
22280  return AArch64::LDSETPA;
22281  break;
22283  return AArch64::LDSETPL;
22284  break;
22287  return AArch64::LDSETPAL;
22288  break;
22289  default:
22290  llvm_unreachable("Unexpected ordering!");
22291  }
22292  }
22293 
22294  if (ISDOpcode == ISD::ATOMIC_SWAP) {
22295  switch (Ordering) {
22297  return AArch64::SWPP;
22298  break;
22300  return AArch64::SWPPA;
22301  break;
22303  return AArch64::SWPPL;
22304  break;
22307  return AArch64::SWPPAL;
22308  break;
22309  default:
22310  llvm_unreachable("Unexpected ordering!");
22311  }
22312  }
22313 
22314  llvm_unreachable("Unexpected ISDOpcode!");
22315 }
22316 
22319  SelectionDAG &DAG,
22320  const AArch64Subtarget *Subtarget) {
22321  // LSE128 has a 128-bit RMW ops, but i128 is not a legal type, so lower it
22322  // here. This follows the approach of the CMP_SWAP_XXX pseudo instructions
22323  // rather than the CASP instructions, because CASP has register classes for
22324  // the pairs of registers and therefore uses REG_SEQUENCE and EXTRACT_SUBREG
22325  // to present them as single operands. LSE128 instructions use the GPR64
22326  // register class (because the pair does not have to be sequential), like
22327  // CMP_SWAP_XXX, and therefore we use TRUNCATE and BUILD_PAIR.
22328 
22329  assert(N->getValueType(0) == MVT::i128 &&
22330  "AtomicLoadXXX on types less than 128 should be legal");
22331 
22332  if (!Subtarget->hasLSE128())
22333  return;
22334 
22335  MachineMemOperand *MemOp = cast<MemSDNode>(N)->getMemOperand();
22336  const SDValue &Chain = N->getOperand(0);
22337  const SDValue &Ptr = N->getOperand(1);
22338  const SDValue &Val128 = N->getOperand(2);
22339  std::pair<SDValue, SDValue> Val2x64 = splitInt128(Val128, DAG);
22340 
22341  const unsigned ISDOpcode = N->getOpcode();
22342  const unsigned MachineOpcode =
22343  getAtomicLoad128Opcode(ISDOpcode, MemOp->getMergedOrdering());
22344 
22345  if (ISDOpcode == ISD::ATOMIC_LOAD_AND) {
22346  SDLoc dl(Val128);
22347  Val2x64.first =
22348  DAG.getNode(ISD::XOR, dl, MVT::i64,
22349  DAG.getConstant(-1ULL, dl, MVT::i64), Val2x64.first);
22350  Val2x64.second =
22351  DAG.getNode(ISD::XOR, dl, MVT::i64,
22352  DAG.getConstant(-1ULL, dl, MVT::i64), Val2x64.second);
22353  }
22354 
22355  SDValue Ops[] = {Val2x64.first, Val2x64.second, Ptr, Chain};
22356  if (DAG.getDataLayout().isBigEndian())
22357  std::swap(Ops[0], Ops[1]);
22358 
22359  MachineSDNode *AtomicInst =
22360  DAG.getMachineNode(MachineOpcode, SDLoc(N),
22361  DAG.getVTList(MVT::i64, MVT::i64, MVT::Other), Ops);
22362 
22363  DAG.setNodeMemRefs(AtomicInst, {MemOp});
22364 
22365  SDValue Lo = SDValue(AtomicInst, 0), Hi = SDValue(AtomicInst, 1);
22366  if (DAG.getDataLayout().isBigEndian())
22367  std::swap(Lo, Hi);
22368 
22369  Results.push_back(DAG.getNode(ISD::BUILD_PAIR, SDLoc(N), MVT::i128, Lo, Hi));
22370  Results.push_back(SDValue(AtomicInst, 2)); // Chain out
22371 }
22372 
22373 void AArch64TargetLowering::ReplaceNodeResults(
22375  switch (N->getOpcode()) {
22376  default:
22377  llvm_unreachable("Don't know how to custom expand this");
22378  case ISD::BITCAST:
22379  ReplaceBITCASTResults(N, Results, DAG);
22380  return;
22381  case ISD::VECREDUCE_ADD:
22382  case ISD::VECREDUCE_SMAX:
22383  case ISD::VECREDUCE_SMIN:
22384  case ISD::VECREDUCE_UMAX:
22385  case ISD::VECREDUCE_UMIN:
22386  Results.push_back(LowerVECREDUCE(SDValue(N, 0), DAG));
22387  return;
22388  case ISD::ADD:
22389  case ISD::FADD:
22390  ReplaceAddWithADDP(N, Results, DAG, Subtarget);
22391  return;
22392 
22393  case ISD::CTPOP:
22394  case ISD::PARITY:
22395  if (SDValue Result = LowerCTPOP_PARITY(SDValue(N, 0), DAG))
22396  Results.push_back(Result);
22397  return;
22398  case AArch64ISD::SADDV:
22400  return;
22401  case AArch64ISD::UADDV:
22403  return;
22404  case AArch64ISD::SMINV:
22406  return;
22407  case AArch64ISD::UMINV:
22409  return;
22410  case AArch64ISD::SMAXV:
22412  return;
22413  case AArch64ISD::UMAXV:
22415  return;
22416  case ISD::FP_TO_UINT:
22417  case ISD::FP_TO_SINT:
22420  assert(N->getValueType(0) == MVT::i128 && "unexpected illegal conversion");
22421  // Let normal code take care of it by not adding anything to Results.
22422  return;
22423  case ISD::ATOMIC_CMP_SWAP:
22424  ReplaceCMP_SWAP_128Results(N, Results, DAG, Subtarget);
22425  return;
22426  case ISD::ATOMIC_LOAD_CLR:
22427  assert(N->getValueType(0) != MVT::i128 &&
22428  "128-bit ATOMIC_LOAD_AND should be lowered directly to LDCLRP");
22429  break;
22430  case ISD::ATOMIC_LOAD_AND:
22431  case ISD::ATOMIC_LOAD_OR:
22432  case ISD::ATOMIC_SWAP: {
22433  assert(cast<AtomicSDNode>(N)->getVal().getValueType() == MVT::i128 &&
22434  "Expected 128-bit atomicrmw.");
22435  // These need custom type legalisation so we go directly to instruction.
22436  ReplaceATOMIC_LOAD_128Results(N, Results, DAG, Subtarget);
22437  return;
22438  }
22439  case ISD::ATOMIC_LOAD:
22440  case ISD::LOAD: {
22441  MemSDNode *LoadNode = cast<MemSDNode>(N);
22442  EVT MemVT = LoadNode->getMemoryVT();
22443  // Handle lowering 256 bit non temporal loads into LDNP for little-endian
22444  // targets.
22445  if (LoadNode->isNonTemporal() && Subtarget->isLittleEndian() &&
22446  MemVT.getSizeInBits() == 256u &&
22447  (MemVT.getScalarSizeInBits() == 8u ||
22448  MemVT.getScalarSizeInBits() == 16u ||
22449  MemVT.getScalarSizeInBits() == 32u ||
22450  MemVT.getScalarSizeInBits() == 64u)) {
22451 
22454  DAG.getVTList({MemVT.getHalfNumVectorElementsVT(*DAG.getContext()),
22455  MemVT.getHalfNumVectorElementsVT(*DAG.getContext()),
22456  MVT::Other}),
22457  {LoadNode->getChain(), LoadNode->getBasePtr()},
22458  LoadNode->getMemoryVT(), LoadNode->getMemOperand());
22459 
22460  SDValue Pair = DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N), MemVT,
22461  Result.getValue(0), Result.getValue(1));
22462  Results.append({Pair, Result.getValue(2) /* Chain */});
22463  return;
22464  }
22465 
22466  if ((!LoadNode->isVolatile() && !LoadNode->isAtomic()) ||
22467  LoadNode->getMemoryVT() != MVT::i128) {
22468  // Non-volatile or atomic loads are optimized later in AArch64's load/store
22469  // optimizer.
22470  return;
22471  }
22472 
22473  if (SDValue(N, 0).getValueType() == MVT::i128) {
22474  auto *AN = dyn_cast<AtomicSDNode>(LoadNode);
22475  bool isLoadAcquire =
22477  unsigned Opcode = isLoadAcquire ? AArch64ISD::LDIAPP : AArch64ISD::LDP;
22478 
22479  if (isLoadAcquire)
22480  assert(Subtarget->hasFeature(AArch64::FeatureRCPC3));
22481 
22483  Opcode, SDLoc(N), DAG.getVTList({MVT::i64, MVT::i64, MVT::Other}),
22484  {LoadNode->getChain(), LoadNode->getBasePtr()},
22485  LoadNode->getMemoryVT(), LoadNode->getMemOperand());
22486 
22488  Result.getValue(0), Result.getValue(1));
22489  Results.append({Pair, Result.getValue(2) /* Chain */});
22490  }
22491  return;
22492  }
22494  ReplaceExtractSubVectorResults(N, Results, DAG);
22495  return;
22496  case ISD::INSERT_SUBVECTOR:
22497  case ISD::CONCAT_VECTORS:
22498  // Custom lowering has been requested for INSERT_SUBVECTOR and
22499  // CONCAT_VECTORS -- but delegate to common code for result type
22500  // legalisation
22501  return;
22502  case ISD::INTRINSIC_WO_CHAIN: {
22503  EVT VT = N->getValueType(0);
22504  assert((VT == MVT::i8 || VT == MVT::i16) &&
22505  "custom lowering for unexpected type");
22506 
22507  ConstantSDNode *CN = cast<ConstantSDNode>(N->getOperand(0));
22508  Intrinsic::ID IntID = static_cast<Intrinsic::ID>(CN->getZExtValue());
22509  switch (IntID) {
22510  default:
22511  return;
22512  case Intrinsic::aarch64_sve_clasta_n: {
22513  SDLoc DL(N);
22514  auto Op2 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i32, N->getOperand(2));
22515  auto V = DAG.getNode(AArch64ISD::CLASTA_N, DL, MVT::i32,
22516  N->getOperand(1), Op2, N->getOperand(3));
22517  Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, VT, V));
22518  return;
22519  }
22520  case Intrinsic::aarch64_sve_clastb_n: {
22521  SDLoc DL(N);
22522  auto Op2 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i32, N->getOperand(2));
22523  auto V = DAG.getNode(AArch64ISD::CLASTB_N, DL, MVT::i32,
22524  N->getOperand(1), Op2, N->getOperand(3));
22525  Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, VT, V));
22526  return;
22527  }
22528  case Intrinsic::aarch64_sve_lasta: {
22529  SDLoc DL(N);
22530  auto V = DAG.getNode(AArch64ISD::LASTA, DL, MVT::i32,
22531  N->getOperand(1), N->getOperand(2));
22532  Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, VT, V));
22533  return;
22534  }
22535  case Intrinsic::aarch64_sve_lastb: {
22536  SDLoc DL(N);
22537  auto V = DAG.getNode(AArch64ISD::LASTB, DL, MVT::i32,
22538  N->getOperand(1), N->getOperand(2));
22539  Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, VT, V));
22540  return;
22541  }
22542  }
22543  }
22544  case ISD::READ_REGISTER: {
22545  SDLoc DL(N);
22546  assert(N->getValueType(0) == MVT::i128 &&
22547  "READ_REGISTER custom lowering is only for 128-bit sysregs");
22548  SDValue Chain = N->getOperand(0);
22549  SDValue SysRegName = N->getOperand(1);
22550 
22551  SDValue Result = DAG.getNode(
22552  AArch64ISD::MRRS, DL, DAG.getVTList({MVT::i64, MVT::i64, MVT::Other}),
22553  Chain, SysRegName);
22554 
22555  // Sysregs are not endian. Result.getValue(0) always contains the lower half
22556  // of the 128-bit System Register value.
22558  Result.getValue(0), Result.getValue(1));
22559  Results.push_back(Pair);
22560  Results.push_back(Result.getValue(2)); // Chain
22561  return;
22562  }
22563  }
22564 }
22565 
22567  if (Subtarget->isTargetAndroid() || Subtarget->isTargetFuchsia())
22569  return true;
22570 }
22571 
22572 unsigned AArch64TargetLowering::combineRepeatedFPDivisors() const {
22573  // Combine multiple FDIVs with the same divisor into multiple FMULs by the
22574  // reciprocal if there are three or more FDIVs.
22575  return 3;
22576 }
22577 
22580  // During type legalization, we prefer to widen v1i8, v1i16, v1i32 to v8i8,
22581  // v4i16, v2i32 instead of to promote.
22582  if (VT == MVT::v1i8 || VT == MVT::v1i16 || VT == MVT::v1i32 ||
22583  VT == MVT::v1f32)
22584  return TypeWidenVector;
22585 
22587 }
22588 
22589 // In v8.4a, ldp and stp instructions are guaranteed to be single-copy atomic
22590 // provided the address is 16-byte aligned.
22592  if (!Subtarget->hasLSE2())
22593  return false;
22594 
22595  if (auto LI = dyn_cast<LoadInst>(I))
22596  return LI->getType()->getPrimitiveSizeInBits() == 128 &&
22597  LI->getAlign() >= Align(16);
22598 
22599  if (auto SI = dyn_cast<StoreInst>(I))
22600  return SI->getValueOperand()->getType()->getPrimitiveSizeInBits() == 128 &&
22601  SI->getAlign() >= Align(16);
22602 
22603  return false;
22604 }
22605 
22607  if (!Subtarget->hasLSE128())
22608  return false;
22609 
22610  // Only use SWPP for stores where LSE2 would require a fence. Unlike STP, SWPP
22611  // will clobber the two registers.
22612  if (const auto *SI = dyn_cast<StoreInst>(I))
22613  return SI->getValueOperand()->getType()->getPrimitiveSizeInBits() == 128 &&
22614  SI->getAlign() >= Align(16) &&
22615  (SI->getOrdering() == AtomicOrdering::Release ||
22616  SI->getOrdering() == AtomicOrdering::SequentiallyConsistent);
22617 
22618  if (const auto *RMW = dyn_cast<AtomicRMWInst>(I))
22619  return RMW->getValOperand()->getType()->getPrimitiveSizeInBits() == 128 &&
22620  RMW->getAlign() >= Align(16) &&
22621  (RMW->getOperation() == AtomicRMWInst::Xchg ||
22622  RMW->getOperation() == AtomicRMWInst::And ||
22623  RMW->getOperation() == AtomicRMWInst::Or);
22624 
22625  return false;
22626 }
22627 
22629  if (!Subtarget->hasLSE2() || !Subtarget->hasRCPC3())
22630  return false;
22631 
22632  if (auto LI = dyn_cast<LoadInst>(I))
22633  return LI->getType()->getPrimitiveSizeInBits() == 128 &&
22634  LI->getAlign() >= Align(16) &&
22635  LI->getOrdering() == AtomicOrdering::Acquire;
22636 
22637  if (auto SI = dyn_cast<StoreInst>(I))
22638  return SI->getValueOperand()->getType()->getPrimitiveSizeInBits() == 128 &&
22639  SI->getAlign() >= Align(16) &&
22640  SI->getOrdering() == AtomicOrdering::Release;
22641 
22642  return false;
22643 }
22644 
22646  const Instruction *I) const {
22647  if (isOpSuitableForRCPC3(I))
22648  return false;
22649  if (isOpSuitableForLSE128(I))
22650  return false;
22651  if (isOpSuitableForLDPSTP(I))
22652  return true;
22653  return false;
22654 }
22655 
22657  const Instruction *I) const {
22658  // Store-Release instructions only provide seq_cst guarantees when paired with
22659  // Load-Acquire instructions. MSVC CRT does not use these instructions to
22660  // implement seq_cst loads and stores, so we need additional explicit fences
22661  // after memory writes.
22662  if (!Subtarget->getTargetTriple().isWindowsMSVCEnvironment())
22663  return false;
22664 
22665  switch (I->getOpcode()) {
22666  default:
22667  return false;
22668  case Instruction::AtomicCmpXchg:
22669  return cast<AtomicCmpXchgInst>(I)->getSuccessOrdering() ==
22671  case Instruction::AtomicRMW:
22672  return cast<AtomicRMWInst>(I)->getOrdering() ==
22674  case Instruction::Store:
22675  return cast<StoreInst>(I)->getOrdering() ==
22677  }
22678 }
22679 
22680 // Loads and stores less than 128-bits are already atomic; ones above that
22681 // are doomed anyway, so defer to the default libcall and blame the OS when
22682 // things go wrong.
22685  unsigned Size = SI->getValueOperand()->getType()->getPrimitiveSizeInBits();
22686  if (Size != 128)
22688  if (isOpSuitableForRCPC3(SI))
22695 }
22696 
22697 // Loads and stores less than 128-bits are already atomic; ones above that
22698 // are doomed anyway, so defer to the default libcall and blame the OS when
22699 // things go wrong.
22702  unsigned Size = LI->getType()->getPrimitiveSizeInBits();
22703 
22704  if (Size != 128)
22706  if (isOpSuitableForRCPC3(LI))
22708  // No LSE128 loads
22709  if (isOpSuitableForLDPSTP(LI))
22711 
22712  // At -O0, fast-regalloc cannot cope with the live vregs necessary to
22713  // implement atomicrmw without spilling. If the target address is also on the
22714  // stack and close enough to the spill slot, this can lead to a situation
22715  // where the monitor always gets cleared and the atomic operation can never
22716  // succeed. So at -O0 lower this operation to a CAS loop.
22717  if (getTargetMachine().getOptLevel() == CodeGenOpt::None)
22719 
22720  // Using CAS for an atomic load has a better chance of succeeding under high
22721  // contention situations. So use it if available.
22722  return Subtarget->hasLSE() ? AtomicExpansionKind::CmpXChg
22724 }
22725 
22726 // For the real atomic operations, we have ldxr/stxr up to 128 bits,
22729  if (AI->isFloatingPointOperation())
22731 
22732  unsigned Size = AI->getType()->getPrimitiveSizeInBits();
22733  if (Size > 128) return AtomicExpansionKind::None;
22734 
22735  bool CanUseLSE128 = Subtarget->hasLSE128() && Size == 128 &&
22736  (AI->getOperation() == AtomicRMWInst::Xchg ||
22737  AI->getOperation() == AtomicRMWInst::Or ||
22738  AI->getOperation() == AtomicRMWInst::And);
22739  if (CanUseLSE128)
22741 
22742  // Nand is not supported in LSE.
22743  // Leave 128 bits to LLSC or CmpXChg.
22744  if (AI->getOperation() != AtomicRMWInst::Nand && Size < 128) {
22745  if (Subtarget->hasLSE())
22747  if (Subtarget->outlineAtomics()) {
22748  // [U]Min/[U]Max RWM atomics are used in __sync_fetch_ libcalls so far.
22749  // Don't outline them unless
22750  // (1) high level <atomic> support approved:
22751  // http://www.open-std.org/jtc1/sc22/wg21/docs/papers/2020/p0493r1.pdf
22752  // (2) low level libgcc and compiler-rt support implemented by:
22753  // min/max outline atomics helpers
22754  if (AI->getOperation() != AtomicRMWInst::Min &&
22755  AI->getOperation() != AtomicRMWInst::Max &&
22756  AI->getOperation() != AtomicRMWInst::UMin &&
22757  AI->getOperation() != AtomicRMWInst::UMax) {
22759  }
22760  }
22761  }
22762 
22763  // At -O0, fast-regalloc cannot cope with the live vregs necessary to
22764  // implement atomicrmw without spilling. If the target address is also on the
22765  // stack and close enough to the spill slot, this can lead to a situation
22766  // where the monitor always gets cleared and the atomic operation can never
22767  // succeed. So at -O0 lower this operation to a CAS loop. Also worthwhile if
22768  // we have a single CAS instruction that can replace the loop.
22770  Subtarget->hasLSE())
22772 
22774 }
22775 
22778  AtomicCmpXchgInst *AI) const {
22779  // If subtarget has LSE, leave cmpxchg intact for codegen.
22780  if (Subtarget->hasLSE() || Subtarget->outlineAtomics())
22782  // At -O0, fast-regalloc cannot cope with the live vregs necessary to
22783  // implement cmpxchg without spilling. If the address being exchanged is also
22784  // on the stack and close enough to the spill slot, this can lead to a
22785  // situation where the monitor always gets cleared and the atomic operation
22786  // can never succeed. So at -O0 we need a late-expanded pseudo-inst instead.
22787  if (getTargetMachine().getOptLevel() == CodeGenOpt::None)
22789 
22790  // 128-bit atomic cmpxchg is weird; AtomicExpand doesn't know how to expand
22791  // it.
22792  unsigned Size = AI->getCompareOperand()->getType()->getPrimitiveSizeInBits();
22793  if (Size > 64)
22795 
22797 }
22798 
22800  Type *ValueTy, Value *Addr,
22801  AtomicOrdering Ord) const {
22802  Module *M = Builder.GetInsertBlock()->getParent()->getParent();
22803  bool IsAcquire = isAcquireOrStronger(Ord);
22804 
22805  // Since i128 isn't legal and intrinsics don't get type-lowered, the ldrexd
22806  // intrinsic must return {i64, i64} and we have to recombine them into a
22807  // single i128 here.
22808  if (ValueTy->getPrimitiveSizeInBits() == 128) {
22809  Intrinsic::ID Int =
22810  IsAcquire ? Intrinsic::aarch64_ldaxp : Intrinsic::aarch64_ldxp;
22812 
22813  Addr = Builder.CreateBitCast(Addr, Type::getInt8PtrTy(M->getContext()));
22814  Value *LoHi = Builder.CreateCall(Ldxr, Addr, "lohi");
22815 
22816  Value *Lo = Builder.CreateExtractValue(LoHi, 0, "lo");
22817  Value *Hi = Builder.CreateExtractValue(LoHi, 1, "hi");
22818  Lo = Builder.CreateZExt(Lo, ValueTy, "lo64");
22819  Hi = Builder.CreateZExt(Hi, ValueTy, "hi64");
22820  return Builder.CreateOr(
22821  Lo, Builder.CreateShl(Hi, ConstantInt::get(ValueTy, 64)), "val64");
22822  }
22823 
22824  Type *Tys[] = { Addr->getType() };
22825  Intrinsic::ID Int =
22826  IsAcquire ? Intrinsic::aarch64_ldaxr : Intrinsic::aarch64_ldxr;
22827  Function *Ldxr = Intrinsic::getDeclaration(M, Int, Tys);
22828 
22829  const DataLayout &DL = M->getDataLayout();
22830  IntegerType *IntEltTy = Builder.getIntNTy(DL.getTypeSizeInBits(ValueTy));
22831  CallInst *CI = Builder.CreateCall(Ldxr, Addr);
22832  CI->addParamAttr(
22833  0, Attribute::get(Builder.getContext(), Attribute::ElementType, ValueTy));
22834  Value *Trunc = Builder.CreateTrunc(CI, IntEltTy);
22835 
22836  return Builder.CreateBitCast(Trunc, ValueTy);
22837 }
22838 
22840  IRBuilderBase &Builder) const {
22841  Module *M = Builder.GetInsertBlock()->getParent()->getParent();
22842  Builder.CreateCall(Intrinsic::getDeclaration(M, Intrinsic::aarch64_clrex));
22843 }
22844 
22846  Value *Val, Value *Addr,
22847  AtomicOrdering Ord) const {
22848  Module *M = Builder.GetInsertBlock()->getParent()->getParent();
22849  bool IsRelease = isReleaseOrStronger(Ord);
22850 
22851  // Since the intrinsics must have legal type, the i128 intrinsics take two
22852  // parameters: "i64, i64". We must marshal Val into the appropriate form
22853  // before the call.
22854  if (Val->getType()->getPrimitiveSizeInBits() == 128) {
22855  Intrinsic::ID Int =
22856  IsRelease ? Intrinsic::aarch64_stlxp : Intrinsic::aarch64_stxp;
22858  Type *Int64Ty = Type::getInt64Ty(M->getContext());
22859 
22860  Value *Lo = Builder.CreateTrunc(Val, Int64Ty, "lo");
22861  Value *Hi = Builder.CreateTrunc(Builder.CreateLShr(Val, 64), Int64Ty, "hi");
22862  Addr = Builder.CreateBitCast(Addr, Type::getInt8PtrTy(M->getContext()));
22863  return Builder.CreateCall(Stxr, {Lo, Hi, Addr});
22864  }
22865 
22866  Intrinsic::ID Int =
22867  IsRelease ? Intrinsic::aarch64_stlxr : Intrinsic::aarch64_stxr;
22868  Type *Tys[] = { Addr->getType() };
22869  Function *Stxr = Intrinsic::getDeclaration(M, Int, Tys);
22870 
22871  const DataLayout &DL = M->getDataLayout();
22872  IntegerType *IntValTy = Builder.getIntNTy(DL.getTypeSizeInBits(Val->getType()));
22873  Val = Builder.CreateBitCast(Val, IntValTy);
22874 
22875  CallInst *CI = Builder.CreateCall(
22876  Stxr, {Builder.CreateZExtOrBitCast(
22877  Val, Stxr->getFunctionType()->getParamType(0)),
22878  Addr});
22879  CI->addParamAttr(1, Attribute::get(Builder.getContext(),
22880  Attribute::ElementType, Val->getType()));
22881  return CI;
22882 }
22883 
22885  Type *Ty, CallingConv::ID CallConv, bool isVarArg,
22886  const DataLayout &DL) const {
22887  if (!Ty->isArrayTy()) {
22888  const TypeSize &TySize = Ty->getPrimitiveSizeInBits();
22889  return TySize.isScalable() && TySize.getKnownMinValue() > 128;
22890  }
22891 
22892  // All non aggregate members of the type must have the same type
22893  SmallVector<EVT> ValueVTs;
22894  ComputeValueVTs(*this, DL, Ty, ValueVTs);
22895  return all_equal(ValueVTs);
22896 }
22897 
22898 bool AArch64TargetLowering::shouldNormalizeToSelectSequence(LLVMContext &,
22899  EVT) const {
22900  return false;
22901 }
22902 
22903 static Value *UseTlsOffset(IRBuilderBase &IRB, unsigned Offset) {
22904  Module *M = IRB.GetInsertBlock()->getParent()->getParent();
22905  Function *ThreadPointerFunc =
22906  Intrinsic::getDeclaration(M, Intrinsic::thread_pointer);
22907  return IRB.CreatePointerCast(
22908  IRB.CreateConstGEP1_32(IRB.getInt8Ty(), IRB.CreateCall(ThreadPointerFunc),
22909  Offset),
22910  IRB.getInt8PtrTy()->getPointerTo(0));
22911 }
22912 
22914  // Android provides a fixed TLS slot for the stack cookie. See the definition
22915  // of TLS_SLOT_STACK_GUARD in
22916  // https://android.googlesource.com/platform/bionic/+/master/libc/private/bionic_tls.h
22917  if (Subtarget->isTargetAndroid())
22918  return UseTlsOffset(IRB, 0x28);
22919 
22920  // Fuchsia is similar.
22921  // <zircon/tls.h> defines ZX_TLS_STACK_GUARD_OFFSET with this value.
22922  if (Subtarget->isTargetFuchsia())
22923  return UseTlsOffset(IRB, -0x10);
22924 
22925  return TargetLowering::getIRStackGuard(IRB);
22926 }
22927 
22929  // MSVC CRT provides functionalities for stack protection.
22930  if (Subtarget->getTargetTriple().isWindowsMSVCEnvironment()) {
22931  // MSVC CRT has a global variable holding security cookie.
22932  M.getOrInsertGlobal("__security_cookie",
22933  Type::getInt8PtrTy(M.getContext()));
22934 
22935  // MSVC CRT has a function to validate security cookie.
22936  FunctionCallee SecurityCheckCookie = M.getOrInsertFunction(
22937  Subtarget->getSecurityCheckCookieName(),
22938  Type::getVoidTy(M.getContext()), Type::getInt8PtrTy(M.getContext()));
22939  if (Function *F = dyn_cast<Function>(SecurityCheckCookie.getCallee())) {
22940  F->setCallingConv(CallingConv::Win64);
22941  F->addParamAttr(0, Attribute::AttrKind::InReg);
22942  }
22943  return;
22944  }
22946 }
22947 
22949  // MSVC CRT has a global variable holding security cookie.
22950  if (Subtarget->getTargetTriple().isWindowsMSVCEnvironment())
22951  return M.getGlobalVariable("__security_cookie");
22953 }
22954 
22956  // MSVC CRT has a function to validate security cookie.
22957  if (Subtarget->getTargetTriple().isWindowsMSVCEnvironment())
22958  return M.getFunction(Subtarget->getSecurityCheckCookieName());
22960 }
22961 
22962 Value *
22964  // Android provides a fixed TLS slot for the SafeStack pointer. See the
22965  // definition of TLS_SLOT_SAFESTACK in
22966  // https://android.googlesource.com/platform/bionic/+/master/libc/private/bionic_tls.h
22967  if (Subtarget->isTargetAndroid())
22968  return UseTlsOffset(IRB, 0x48);
22969 
22970  // Fuchsia is similar.
22971  // <zircon/tls.h> defines ZX_TLS_UNSAFE_SP_OFFSET with this value.
22972  if (Subtarget->isTargetFuchsia())
22973  return UseTlsOffset(IRB, -0x8);
22974 
22976 }
22977 
22979  const Instruction &AndI) const {
22980  // Only sink 'and' mask to cmp use block if it is masking a single bit, since
22981  // this is likely to be fold the and/cmp/br into a single tbz instruction. It
22982  // may be beneficial to sink in other cases, but we would have to check that
22983  // the cmp would not get folded into the br to form a cbz for these to be
22984  // beneficial.
22985  ConstantInt* Mask = dyn_cast<ConstantInt>(AndI.getOperand(1));
22986  if (!Mask)
22987  return false;
22988  return Mask->getValue().isPowerOf2();
22989 }
22990 
22994  unsigned OldShiftOpcode, unsigned NewShiftOpcode,
22995  SelectionDAG &DAG) const {
22996  // Does baseline recommend not to perform the fold by default?
22998  X, XC, CC, Y, OldShiftOpcode, NewShiftOpcode, DAG))
22999  return false;
23000  // Else, if this is a vector shift, prefer 'shl'.
23001  return X.getValueType().isScalarInteger() || NewShiftOpcode == ISD::SHL;
23002 }
23003 
23006  SelectionDAG &DAG, SDNode *N, unsigned int ExpansionFactor) const {
23007  if (DAG.getMachineFunction().getFunction().hasMinSize() &&
23008  !Subtarget->isTargetWindows() && !Subtarget->isTargetDarwin())
23011  ExpansionFactor);
23012 }
23013 
23015  // Update IsSplitCSR in AArch64unctionInfo.
23016  AArch64FunctionInfo *AFI = Entry->getParent()->getInfo<AArch64FunctionInfo>();
23017  AFI->setIsSplitCSR(true);
23018 }
23019 
23021  MachineBasicBlock *Entry,
23022  const SmallVectorImpl<MachineBasicBlock *> &Exits) const {
23023  const AArch64RegisterInfo *TRI = Subtarget->getRegisterInfo();
23024  const MCPhysReg *IStart = TRI->getCalleeSavedRegsViaCopy(Entry->getParent());
23025  if (!IStart)
23026  return;
23027 
23028  const TargetInstrInfo *TII = Subtarget->getInstrInfo();
23029  MachineRegisterInfo *MRI = &Entry->getParent()->getRegInfo();
23030  MachineBasicBlock::iterator MBBI = Entry->begin();
23031  for (const MCPhysReg *I = IStart; *I; ++I) {
23032  const TargetRegisterClass *RC = nullptr;
23033  if (AArch64::GPR64RegClass.contains(*I))
23034  RC = &AArch64::GPR64RegClass;
23035  else if (AArch64::FPR64RegClass.contains(*I))
23036  RC = &AArch64::FPR64RegClass;
23037  else
23038  llvm_unreachable("Unexpected register class in CSRsViaCopy!");
23039 
23040  Register NewVR = MRI->createVirtualRegister(RC);
23041  // Create copy from CSR to a virtual register.
23042  // FIXME: this currently does not emit CFI pseudo-instructions, it works
23043  // fine for CXX_FAST_TLS since the C++-style TLS access functions should be
23044  // nounwind. If we want to generalize this later, we may need to emit
23045  // CFI pseudo-instructions.
23046  assert(Entry->getParent()->getFunction().hasFnAttribute(
23047  Attribute::NoUnwind) &&
23048  "Function should be nounwind in insertCopiesSplitCSR!");
23049  Entry->addLiveIn(*I);
23050  BuildMI(*Entry, MBBI, DebugLoc(), TII->get(TargetOpcode::COPY), NewVR)
23051  .addReg(*I);
23052 
23053  // Insert the copy-back instructions right before the terminator.
23054  for (auto *Exit : Exits)
23055  BuildMI(*Exit, Exit->getFirstTerminator(), DebugLoc(),
23056  TII->get(TargetOpcode::COPY), *I)
23057  .addReg(NewVR);
23058  }
23059 }
23060 
23062  // Integer division on AArch64 is expensive. However, when aggressively
23063  // optimizing for code size, we prefer to use a div instruction, as it is
23064  // usually smaller than the alternative sequence.
23065  // The exception to this is vector division. Since AArch64 doesn't have vector
23066  // integer division, leaving the division as-is is a loss even in terms of
23067  // size, because it will have to be scalarized, while the alternative code
23068  // sequence can be performed in vector form.
23069  bool OptSize = Attr.hasFnAttr(Attribute::MinSize);
23070  return OptSize && !VT.isVector();
23071 }
23072 
23074  // We want inc-of-add for scalars and sub-of-not for vectors.
23075  return VT.isScalarInteger();
23076 }
23077 
23079  EVT VT) const {
23080  // v8f16 without fp16 need to be extended to v8f32, which is more difficult to
23081  // legalize.
23082  if (FPVT == MVT::v8f16 && !Subtarget->hasFullFP16())
23083  return false;
23084  return TargetLowering::shouldConvertFpToSat(Op, FPVT, VT);
23085 }
23086 
23088  return Subtarget->hasAggressiveFMA() && VT.isFloatingPoint();
23089 }
23090 
23091 unsigned
23093  if (Subtarget->isTargetDarwin() || Subtarget->isTargetWindows())
23094  return getPointerTy(DL).getSizeInBits();
23095 
23096  return 3 * getPointerTy(DL).getSizeInBits() + 2 * 32;
23097 }
23098 
23099 void AArch64TargetLowering::finalizeLowering(MachineFunction &MF) const {
23100  MachineFrameInfo &MFI = MF.getFrameInfo();
23101  // If we have any vulnerable SVE stack objects then the stack protector
23102  // needs to be placed at the top of the SVE stack area, as the SVE locals
23103  // are placed above the other locals, so we allocate it as if it were a
23104  // scalable vector.
23105  // FIXME: It may be worthwhile having a specific interface for this rather
23106  // than doing it here in finalizeLowering.
23107  if (MFI.hasStackProtectorIndex()) {
23108  for (unsigned int i = 0, e = MFI.getObjectIndexEnd(); i != e; ++i) {
23114  break;
23115  }
23116  }
23117  }
23118  MFI.computeMaxCallFrameSize(MF);
23120 }
23121 
23122 // Unlike X86, we let frame lowering assign offsets to all catch objects.
23124  return false;
23125 }
23126 
23127 bool AArch64TargetLowering::shouldLocalize(
23128  const MachineInstr &MI, const TargetTransformInfo *TTI) const {
23129  auto &MF = *MI.getMF();
23130  auto &MRI = MF.getRegInfo();
23131  auto maxUses = [](unsigned RematCost) {
23132  // A cost of 1 means remats are basically free.
23133  if (RematCost == 1)
23135  if (RematCost == 2)
23136  return 2U;
23137 
23138  // Remat is too expensive, only sink if there's one user.
23139  if (RematCost > 2)
23140  return 1U;
23141  llvm_unreachable("Unexpected remat cost");
23142  };
23143 
23144  switch (MI.getOpcode()) {
23145  case TargetOpcode::G_GLOBAL_VALUE: {
23146  // On Darwin, TLS global vars get selected into function calls, which
23147  // we don't want localized, as they can get moved into the middle of a
23148  // another call sequence.
23149  const GlobalValue &GV = *MI.getOperand(1).getGlobal();
23150  if (GV.isThreadLocal() && Subtarget->isTargetMachO())
23151  return false;
23152  break;
23153  }
23154  case TargetOpcode::G_CONSTANT: {
23155  auto *CI = MI.getOperand(1).getCImm();
23156  APInt Imm = CI->getValue();
23159  assert(Cost.isValid() && "Expected a valid imm cost");
23160 
23161  unsigned RematCost = *Cost.getValue();
23162  Register Reg = MI.getOperand(0).getReg();
23163  unsigned MaxUses = maxUses(RematCost);
23164  // Don't pass UINT_MAX sentinal value to hasAtMostUserInstrs().
23165  if (MaxUses == std::numeric_limits<unsigned>::max())
23166  --MaxUses;
23167  return MRI.hasAtMostUserInstrs(Reg, MaxUses);
23168  }
23169  // If we legalized G_GLOBAL_VALUE into ADRP + G_ADD_LOW, mark both as being
23170  // localizable.
23171  case AArch64::ADRP:
23172  case AArch64::G_ADD_LOW:
23173  return true;
23174  default:
23175  break;
23176  }
23178 }
23179 
23181  if (isa<ScalableVectorType>(Inst.getType()))
23182  return true;
23183 
23184  for (unsigned i = 0; i < Inst.getNumOperands(); ++i)
23185  if (isa<ScalableVectorType>(Inst.getOperand(i)->getType()))
23186  return true;
23187 
23188  if (const AllocaInst *AI = dyn_cast<AllocaInst>(&Inst)) {
23189  if (isa<ScalableVectorType>(AI->getAllocatedType()))
23190  return true;
23191  }
23192 
23193  // Checks to allow the use of SME instructions
23194  if (auto *Base = dyn_cast<CallBase>(&Inst)) {
23195  auto CallerAttrs = SMEAttrs(*Inst.getFunction());
23196  auto CalleeAttrs = SMEAttrs(*Base);
23197  if (CallerAttrs.requiresSMChange(CalleeAttrs,
23198  /*BodyOverridesInterface=*/false) ||
23199  CallerAttrs.requiresLazySave(CalleeAttrs))
23200  return true;
23201  }
23202  return false;
23203 }
23204 
23205 // Return the largest legal scalable vector type that matches VT's element type.
23207  assert(VT.isFixedLengthVector() &&
23208  DAG.getTargetLoweringInfo().isTypeLegal(VT) &&
23209  "Expected legal fixed length vector!");
23210  switch (VT.getVectorElementType().getSimpleVT().SimpleTy) {
23211  default:
23212  llvm_unreachable("unexpected element type for SVE container");
23213  case MVT::i8:
23214  return EVT(MVT::nxv16i8);
23215  case MVT::i16:
23216  return EVT(MVT::nxv8i16);
23217  case MVT::i32:
23218  return EVT(MVT::nxv4i32);
23219  case MVT::i64:
23220  return EVT(MVT::nxv2i64);
23221  case MVT::f16:
23222  return EVT(MVT::nxv8f16);
23223  case MVT::f32:
23224  return EVT(MVT::nxv4f32);
23225  case MVT::f64:
23226  return EVT(MVT::nxv2f64);
23227  }
23228 }
23229 
23230 // Return a PTRUE with active lanes corresponding to the extent of VT.
23232  EVT VT) {
23233  assert(VT.isFixedLengthVector() &&
23234  DAG.getTargetLoweringInfo().isTypeLegal(VT) &&
23235  "Expected legal fixed length vector!");
23236 
23237  std::optional<unsigned> PgPattern =
23239  assert(PgPattern && "Unexpected element count for SVE predicate");
23240 
23241  // For vectors that are exactly getMaxSVEVectorSizeInBits big, we can use
23242  // AArch64SVEPredPattern::all, which can enable the use of unpredicated
23243  // variants of instructions when available.
23244  const auto &Subtarget = DAG.getSubtarget<AArch64Subtarget>();
23245  unsigned MinSVESize = Subtarget.getMinSVEVectorSizeInBits();
23246  unsigned MaxSVESize = Subtarget.getMaxSVEVectorSizeInBits();
23247  if (MaxSVESize && MinSVESize == MaxSVESize &&
23248  MaxSVESize == VT.getSizeInBits())
23249  PgPattern = AArch64SVEPredPattern::all;
23250 
23251  MVT MaskVT;
23252  switch (VT.getVectorElementType().getSimpleVT().SimpleTy) {
23253  default:
23254  llvm_unreachable("unexpected element type for SVE predicate");
23255  case MVT::i8:
23256  MaskVT = MVT::nxv16i1;
23257  break;
23258  case MVT::i16:
23259  case MVT::f16:
23260  MaskVT = MVT::nxv8i1;
23261  break;
23262  case MVT::i32:
23263  case MVT::f32:
23264  MaskVT = MVT::nxv4i1;
23265  break;
23266  case MVT::i64:
23267  case MVT::f64:
23268  MaskVT = MVT::nxv2i1;
23269  break;
23270  }
23271 
23272  return getPTrue(DAG, DL, MaskVT, *PgPattern);
23273 }
23274 
23276  EVT VT) {
23278  "Expected legal scalable vector!");
23279  auto PredTy = VT.changeVectorElementType(MVT::i1);
23280  return getPTrue(DAG, DL, PredTy, AArch64SVEPredPattern::all);
23281 }
23282 
23284  if (VT.isFixedLengthVector())
23285  return getPredicateForFixedLengthVector(DAG, DL, VT);
23286 
23287  return getPredicateForScalableVector(DAG, DL, VT);
23288 }
23289 
23290 // Grow V to consume an entire SVE register.
23292  assert(VT.isScalableVector() &&
23293  "Expected to convert into a scalable vector!");
23295  "Expected a fixed length vector operand!");
23296  SDLoc DL(V);
23297  SDValue Zero = DAG.getConstant(0, DL, MVT::i64);
23298  return DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VT, DAG.getUNDEF(VT), V, Zero);
23299 }
23300 
23301 // Shrink V so it's just big enough to maintain a VT's worth of data.
23303  assert(VT.isFixedLengthVector() &&
23304  "Expected to convert into a fixed length vector!");
23306  "Expected a scalable vector operand!");
23307  SDLoc DL(V);
23308  SDValue Zero = DAG.getConstant(0, DL, MVT::i64);
23309  return DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, V, Zero);
23310 }
23311 
23312 // Convert all fixed length vector loads larger than NEON to masked_loads.
23313 SDValue AArch64TargetLowering::LowerFixedLengthVectorLoadToSVE(
23314  SDValue Op, SelectionDAG &DAG) const {
23315  auto Load = cast<LoadSDNode>(Op);
23316 
23317  SDLoc DL(Op);
23318  EVT VT = Op.getValueType();
23319  EVT ContainerVT = getContainerForFixedLengthVector(DAG, VT);
23320  EVT LoadVT = ContainerVT;
23321  EVT MemVT = Load->getMemoryVT();
23322 
23323  auto Pg = getPredicateForFixedLengthVector(DAG, DL, VT);
23324 
23325  if (VT.isFloatingPoint()) {
23326  LoadVT = ContainerVT.changeTypeToInteger();
23327  MemVT = MemVT.changeTypeToInteger();
23328  }
23329 
23330  SDValue NewLoad = DAG.getMaskedLoad(
23331  LoadVT, DL, Load->getChain(), Load->getBasePtr(), Load->getOffset(), Pg,
23332  DAG.getUNDEF(LoadVT), MemVT, Load->getMemOperand(),
23333  Load->getAddressingMode(), Load->getExtensionType());
23334 
23335  SDValue Result = NewLoad;
23336  if (VT.isFloatingPoint() && Load->getExtensionType() == ISD::EXTLOAD) {
23337  EVT ExtendVT = ContainerVT.changeVectorElementType(
23338  Load->getMemoryVT().getVectorElementType());
23339 
23340  Result = getSVESafeBitCast(ExtendVT, Result, DAG);
23342  Pg, Result, DAG.getUNDEF(ContainerVT));
23343  } else if (VT.isFloatingPoint()) {
23344  Result = DAG.getNode(ISD::BITCAST, DL, ContainerVT, Result);
23345  }
23346 
23347  Result = convertFromScalableVector(DAG, VT, Result);
23348  SDValue MergedValues[2] = {Result, NewLoad.getValue(1)};
23349  return DAG.getMergeValues(MergedValues, DL);
23350 }
23351 
23353  SelectionDAG &DAG) {
23354  SDLoc DL(Mask);
23355  EVT InVT = Mask.getValueType();
23356  EVT ContainerVT = getContainerForFixedLengthVector(DAG, InVT);
23357 
23358  auto Pg = getPredicateForFixedLengthVector(DAG, DL, InVT);
23359 
23360  if (ISD::isBuildVectorAllOnes(Mask.getNode()))
23361  return Pg;
23362 
23363  auto Op1 = convertToScalableVector(DAG, ContainerVT, Mask);
23364  auto Op2 = DAG.getConstant(0, DL, ContainerVT);
23365 
23367  {Pg, Op1, Op2, DAG.getCondCode(ISD::SETNE)});
23368 }
23369 
23370 // Convert all fixed length vector loads larger than NEON to masked_loads.
23371 SDValue AArch64TargetLowering::LowerFixedLengthVectorMLoadToSVE(
23372  SDValue Op, SelectionDAG &DAG) const {
23373  auto Load = cast<MaskedLoadSDNode>(Op);
23374 
23375  SDLoc DL(Op);
23376  EVT VT = Op.getValueType();
23377  EVT ContainerVT = getContainerForFixedLengthVector(DAG, VT);
23378 
23380 
23381  SDValue PassThru;
23382  bool IsPassThruZeroOrUndef = false;
23383 
23384  if (Load->getPassThru()->isUndef()) {
23385  PassThru = DAG.getUNDEF(ContainerVT);
23386  IsPassThruZeroOrUndef = true;
23387  } else {
23388  if (ContainerVT.isInteger())
23389  PassThru = DAG.getConstant(0, DL, ContainerVT);
23390  else
23391  PassThru = DAG.getConstantFP(0, DL, ContainerVT);
23392  if (isZerosVector(Load->getPassThru().getNode()))
23393  IsPassThruZeroOrUndef = true;
23394  }
23395 
23396  SDValue NewLoad = DAG.getMaskedLoad(
23397  ContainerVT, DL, Load->getChain(), Load->getBasePtr(), Load->getOffset(),
23398  Mask, PassThru, Load->getMemoryVT(), Load->getMemOperand(),
23399  Load->getAddressingMode(), Load->getExtensionType());
23400 
23401  SDValue Result = NewLoad;
23402  if (!IsPassThruZeroOrUndef) {
23403  SDValue OldPassThru =
23404  convertToScalableVector(DAG, ContainerVT, Load->getPassThru());
23405  Result = DAG.getSelect(DL, ContainerVT, Mask, Result, OldPassThru);
23406  }
23407 
23408  Result = convertFromScalableVector(DAG, VT, Result);
23409  SDValue MergedValues[2] = {Result, NewLoad.getValue(1)};
23410  return DAG.getMergeValues(MergedValues, DL);
23411 }
23412 
23413 // Convert all fixed length vector stores larger than NEON to masked_stores.
23414 SDValue AArch64TargetLowering::LowerFixedLengthVectorStoreToSVE(
23415  SDValue Op, SelectionDAG &DAG) const {
23416  auto Store = cast<StoreSDNode>(Op);
23417 
23418  SDLoc DL(Op);
23419  EVT VT = Store->getValue().getValueType();
23420  EVT ContainerVT = getContainerForFixedLengthVector(DAG, VT);
23421  EVT MemVT = Store->getMemoryVT();
23422 
23423  auto Pg = getPredicateForFixedLengthVector(DAG, DL, VT);
23424  auto NewValue = convertToScalableVector(DAG, ContainerVT, Store->getValue());
23425 
23426  if (VT.isFloatingPoint() && Store->isTruncatingStore()) {
23427  EVT TruncVT = ContainerVT.changeVectorElementType(
23428  Store->getMemoryVT().getVectorElementType());
23429  MemVT = MemVT.changeTypeToInteger();
23430  NewValue = DAG.getNode(AArch64ISD::FP_ROUND_MERGE_PASSTHRU, DL, TruncVT, Pg,
23431  NewValue, DAG.getTargetConstant(0, DL, MVT::i64),
23432  DAG.getUNDEF(TruncVT));
23433  NewValue =
23434  getSVESafeBitCast(ContainerVT.changeTypeToInteger(), NewValue, DAG);
23435  } else if (VT.isFloatingPoint()) {
23436  MemVT = MemVT.changeTypeToInteger();
23437  NewValue =
23438  getSVESafeBitCast(ContainerVT.changeTypeToInteger(), NewValue, DAG);
23439  }
23440 
23441  return DAG.getMaskedStore(Store->getChain(), DL, NewValue,
23442  Store->getBasePtr(), Store->getOffset(), Pg, MemVT,
23443  Store->getMemOperand(), Store->getAddressingMode(),
23444  Store->isTruncatingStore());
23445 }
23446 
23447 SDValue AArch64TargetLowering::LowerFixedLengthVectorMStoreToSVE(
23448  SDValue Op, SelectionDAG &DAG) const {
23449  auto *Store = cast<MaskedStoreSDNode>(Op);
23450 
23451  SDLoc DL(Op);
23452  EVT VT = Store->getValue().getValueType();
23453  EVT ContainerVT = getContainerForFixedLengthVector(DAG, VT);
23454 
23455  auto NewValue = convertToScalableVector(DAG, ContainerVT, Store->getValue());
23457 
23458  return DAG.getMaskedStore(
23459  Store->getChain(), DL, NewValue, Store->getBasePtr(), Store->getOffset(),
23460  Mask, Store->getMemoryVT(), Store->getMemOperand(),
23461  Store->getAddressingMode(), Store->isTruncatingStore());
23462 }
23463 
23464 SDValue AArch64TargetLowering::LowerFixedLengthVectorIntDivideToSVE(
23465  SDValue Op, SelectionDAG &DAG) const {
23466  SDLoc dl(Op);
23467  EVT VT = Op.getValueType();
23468  EVT EltVT = VT.getVectorElementType();
23469 
23470  bool Signed = Op.getOpcode() == ISD::SDIV;
23471  unsigned PredOpcode = Signed ? AArch64ISD::SDIV_PRED : AArch64ISD::UDIV_PRED;
23472 
23473  bool Negated;
23474  uint64_t SplatVal;
23475  if (Signed && isPow2Splat(Op.getOperand(1), SplatVal, Negated)) {
23476  EVT ContainerVT = getContainerForFixedLengthVector(DAG, VT);
23477  SDValue Op1 = convertToScalableVector(DAG, ContainerVT, Op.getOperand(0));
23478  SDValue Op2 = DAG.getTargetConstant(Log2_64(SplatVal), dl, MVT::i32);
23479 
23480  SDValue Pg = getPredicateForFixedLengthVector(DAG, dl, VT);
23481  SDValue Res =
23482  DAG.getNode(AArch64ISD::SRAD_MERGE_OP1, dl, ContainerVT, Pg, Op1, Op2);
23483  if (Negated)
23484  Res = DAG.getNode(ISD::SUB, dl, ContainerVT,
23485  DAG.getConstant(0, dl, ContainerVT), Res);
23486 
23487  return convertFromScalableVector(DAG, VT, Res);
23488  }
23489 
23490  // Scalable vector i32/i64 DIV is supported.
23491  if (EltVT == MVT::i32 || EltVT == MVT::i64)
23492  return LowerToPredicatedOp(Op, DAG, PredOpcode);
23493 
23494  // Scalable vector i8/i16 DIV is not supported. Promote it to i32.
23495  EVT HalfVT = VT.getHalfNumVectorElementsVT(*DAG.getContext());
23496  EVT PromVT = HalfVT.widenIntegerVectorElementType(*DAG.getContext());
23497  unsigned ExtendOpcode = Signed ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND;
23498 
23499  // If the wider type is legal: extend, op, and truncate.
23500  EVT WideVT = VT.widenIntegerVectorElementType(*DAG.getContext());
23501  if (DAG.getTargetLoweringInfo().isTypeLegal(WideVT)) {
23502  SDValue Op0 = DAG.getNode(ExtendOpcode, dl, WideVT, Op.getOperand(0));
23503  SDValue Op1 = DAG.getNode(ExtendOpcode, dl, WideVT, Op.getOperand(1));
23504  SDValue Div = DAG.getNode(Op.getOpcode(), dl, WideVT, Op0, Op1);
23505  return DAG.getNode(ISD::TRUNCATE, dl, VT, Div);
23506  }
23507 
23508  auto HalveAndExtendVector = [&DAG, &dl, &HalfVT, &PromVT,
23509  &ExtendOpcode](SDValue Op) {
23510  SDValue IdxZero = DAG.getConstant(0, dl, MVT::i64);
23511  SDValue IdxHalf =
23512  DAG.getConstant(HalfVT.getVectorNumElements(), dl, MVT::i64);
23513  SDValue Lo = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, HalfVT, Op, IdxZero);
23514  SDValue Hi = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, HalfVT, Op, IdxHalf);
23515  return std::pair<SDValue, SDValue>(
23516  {DAG.getNode(ExtendOpcode, dl, PromVT, Lo),
23517  DAG.getNode(ExtendOpcode, dl, PromVT, Hi)});
23518  };
23519 
23520  // If wider type is not legal: split, extend, op, trunc and concat.
23521  auto [Op0LoExt, Op0HiExt] = HalveAndExtendVector(Op.getOperand(0));
23522  auto [Op1LoExt, Op1HiExt] = HalveAndExtendVector(Op.getOperand(1));
23523  SDValue Lo = DAG.getNode(Op.getOpcode(), dl, PromVT, Op0LoExt, Op1LoExt);
23524  SDValue Hi = DAG.getNode(Op.getOpcode(), dl, PromVT, Op0HiExt, Op1HiExt);
23525  SDValue LoTrunc = DAG.getNode(ISD::TRUNCATE, dl, HalfVT, Lo);
23526  SDValue HiTrunc = DAG.getNode(ISD::TRUNCATE, dl, HalfVT, Hi);
23527  return DAG.getNode(ISD::CONCAT_VECTORS, dl, VT, {LoTrunc, HiTrunc});
23528 }
23529 
23530 SDValue AArch64TargetLowering::LowerFixedLengthVectorIntExtendToSVE(
23531  SDValue Op, SelectionDAG &DAG) const {
23532  EVT VT = Op.getValueType();
23533  assert(VT.isFixedLengthVector() && "Expected fixed length vector type!");
23534 
23535  SDLoc DL(Op);
23536  SDValue Val = Op.getOperand(0);
23537  EVT ContainerVT = getContainerForFixedLengthVector(DAG, Val.getValueType());
23538  Val = convertToScalableVector(DAG, ContainerVT, Val);
23539 
23540  bool Signed = Op.getOpcode() == ISD::SIGN_EXTEND;
23541  unsigned ExtendOpc = Signed ? AArch64ISD::SUNPKLO : AArch64ISD::UUNPKLO;
23542 
23543  // Repeatedly unpack Val until the result is of the desired element type.
23544  switch (ContainerVT.getSimpleVT().SimpleTy) {
23545  default:
23546  llvm_unreachable("unimplemented container type");
23547  case MVT::nxv16i8:
23548  Val = DAG.getNode(ExtendOpc, DL, MVT::nxv8i16, Val);
23549  if (VT.getVectorElementType() == MVT::i16)
23550  break;
23551  [[fallthrough]];
23552  case MVT::nxv8i16:
23553  Val = DAG.getNode(ExtendOpc, DL, MVT::nxv4i32, Val);
23554  if (VT.getVectorElementType() == MVT::i32)
23555  break;
23556  [[fallthrough]];
23557  case MVT::nxv4i32:
23558  Val = DAG.getNode(ExtendOpc, DL, MVT::nxv2i64, Val);
23559  assert(VT.getVectorElementType() == MVT::i64 && "Unexpected element type!");
23560  break;
23561  }
23562 
23563  return convertFromScalableVector(DAG, VT, Val);
23564 }
23565 
23566 SDValue AArch64TargetLowering::LowerFixedLengthVectorTruncateToSVE(
23567  SDValue Op, SelectionDAG &DAG) const {
23568  EVT VT = Op.getValueType();
23569  assert(VT.isFixedLengthVector() && "Expected fixed length vector type!");
23570 
23571  SDLoc DL(Op);
23572  SDValue Val = Op.getOperand(0);
23573  EVT ContainerVT = getContainerForFixedLengthVector(DAG, Val.getValueType());
23574  Val = convertToScalableVector(DAG, ContainerVT, Val);
23575 
23576  // Repeatedly truncate Val until the result is of the desired element type.
23577  switch (ContainerVT.getSimpleVT().SimpleTy) {
23578  default:
23579  llvm_unreachable("unimplemented container type");
23580  case MVT::nxv2i64:
23581  Val = DAG.getNode(ISD::BITCAST, DL, MVT::nxv4i32, Val);
23582  Val = DAG.getNode(AArch64ISD::UZP1, DL, MVT::nxv4i32, Val, Val);
23583  if (VT.getVectorElementType() == MVT::i32)
23584  break;
23585  [[fallthrough]];
23586  case MVT::nxv4i32:
23587  Val = DAG.getNode(ISD::BITCAST, DL, MVT::nxv8i16, Val);
23588  Val = DAG.getNode(AArch64ISD::UZP1, DL, MVT::nxv8i16, Val, Val);
23589  if (VT.getVectorElementType() == MVT::i16)
23590  break;
23591  [[fallthrough]];
23592  case MVT::nxv8i16:
23593  Val = DAG.getNode(ISD::BITCAST, DL, MVT::nxv16i8, Val);
23594  Val = DAG.getNode(AArch64ISD::UZP1, DL, MVT::nxv16i8, Val, Val);
23595  assert(VT.getVectorElementType() == MVT::i8 && "Unexpected element type!");
23596  break;
23597  }
23598 
23599  return convertFromScalableVector(DAG, VT, Val);
23600 }
23601 
23602 SDValue AArch64TargetLowering::LowerFixedLengthExtractVectorElt(
23603  SDValue Op, SelectionDAG &DAG) const {
23604  EVT VT = Op.getValueType();
23605  EVT InVT = Op.getOperand(0).getValueType();
23606  assert(InVT.isFixedLengthVector() && "Expected fixed length vector type!");
23607 
23608  SDLoc DL(Op);
23609  EVT ContainerVT = getContainerForFixedLengthVector(DAG, InVT);
23610  SDValue Op0 = convertToScalableVector(DAG, ContainerVT, Op->getOperand(0));
23611 
23612  return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, VT, Op0, Op.getOperand(1));
23613 }
23614 
23615 SDValue AArch64TargetLowering::LowerFixedLengthInsertVectorElt(
23616  SDValue Op, SelectionDAG &DAG) const {
23617  EVT VT = Op.getValueType();
23618  assert(VT.isFixedLengthVector() && "Expected fixed length vector type!");
23619 
23620  SDLoc DL(Op);
23621  EVT InVT = Op.getOperand(0).getValueType();
23622  EVT ContainerVT = getContainerForFixedLengthVector(DAG, InVT);
23623  SDValue Op0 = convertToScalableVector(DAG, ContainerVT, Op->getOperand(0));
23624 
23625  auto ScalableRes = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, ContainerVT, Op0,
23626  Op.getOperand(1), Op.getOperand(2));
23627 
23628  return convertFromScalableVector(DAG, VT, ScalableRes);
23629 }
23630 
23631 // Convert vector operation 'Op' to an equivalent predicated operation whereby
23632 // the original operation's type is used to construct a suitable predicate.
23633 // NOTE: The results for inactive lanes are undefined.
23634 SDValue AArch64TargetLowering::LowerToPredicatedOp(SDValue Op,
23635  SelectionDAG &DAG,
23636  unsigned NewOp) const {
23637  EVT VT = Op.getValueType();
23638  SDLoc DL(Op);
23639  auto Pg = getPredicateForVector(DAG, DL, VT);
23640 
23641  if (VT.isFixedLengthVector()) {
23642  assert(isTypeLegal(VT) && "Expected only legal fixed-width types");
23643  EVT ContainerVT = getContainerForFixedLengthVector(DAG, VT);
23644 
23645  // Create list of operands by converting existing ones to scalable types.
23647  for (const SDValue &V : Op->op_values()) {
23648  if (isa<CondCodeSDNode>(V)) {
23649  Operands.push_back(V);
23650  continue;
23651  }
23652 
23653  if (const VTSDNode *VTNode = dyn_cast<VTSDNode>(V)) {
23654  EVT VTArg = VTNode->getVT().getVectorElementType();
23655  EVT NewVTArg = ContainerVT.changeVectorElementType(VTArg);
23656  Operands.push_back(DAG.getValueType(NewVTArg));
23657  continue;
23658  }
23659 
23661  "Expected only legal fixed-width types");
23662  Operands.push_back(convertToScalableVector(DAG, ContainerVT, V));
23663  }
23664 
23665  if (isMergePassthruOpcode(NewOp))
23666  Operands.push_back(DAG.getUNDEF(ContainerVT));
23667 
23668  auto ScalableRes = DAG.getNode(NewOp, DL, ContainerVT, Operands);
23669  return convertFromScalableVector(DAG, VT, ScalableRes);
23670  }
23671 
23672  assert(VT.isScalableVector() && "Only expect to lower scalable vector op!");
23673 
23675  for (const SDValue &V : Op->op_values()) {
23676  assert((!V.getValueType().isVector() ||
23677  V.getValueType().isScalableVector()) &&
23678  "Only scalable vectors are supported!");
23679  Operands.push_back(V);
23680  }
23681 
23682  if (isMergePassthruOpcode(NewOp))
23683  Operands.push_back(DAG.getUNDEF(VT));
23684 
23685  return DAG.getNode(NewOp, DL, VT, Operands, Op->getFlags());
23686 }
23687 
23688 // If a fixed length vector operation has no side effects when applied to
23689 // undefined elements, we can safely use scalable vectors to perform the same
23690 // operation without needing to worry about predication.
23691 SDValue AArch64TargetLowering::LowerToScalableOp(SDValue Op,
23692  SelectionDAG &DAG) const {
23693  EVT VT = Op.getValueType();
23694  assert(VT.isFixedLengthVector() && isTypeLegal(VT) &&
23695  "Only expected to lower fixed length vector operation!");
23696  EVT ContainerVT = getContainerForFixedLengthVector(DAG, VT);
23697 
23698  // Create list of operands by converting existing ones to scalable types.
23700  for (const SDValue &V : Op->op_values()) {
23701  assert(!isa<VTSDNode>(V) && "Unexpected VTSDNode node!");
23702 
23703  // Pass through non-vector operands.
23704  if (!V.getValueType().isVector()) {
23705  Ops.push_back(V);
23706  continue;
23707  }
23708 
23709  // "cast" fixed length vector to a scalable vector.
23711  isTypeLegal(V.getValueType()) &&
23712  "Only fixed length vectors are supported!");
23713  Ops.push_back(convertToScalableVector(DAG, ContainerVT, V));
23714  }
23715 
23716  auto ScalableRes = DAG.getNode(Op.getOpcode(), SDLoc(Op), ContainerVT, Ops);
23717  return convertFromScalableVector(DAG, VT, ScalableRes);
23718 }
23719 
23720 SDValue AArch64TargetLowering::LowerVECREDUCE_SEQ_FADD(SDValue ScalarOp,
23721  SelectionDAG &DAG) const {
23722  SDLoc DL(ScalarOp);
23723  SDValue AccOp = ScalarOp.getOperand(0);
23724  SDValue VecOp = ScalarOp.getOperand(1);
23725  EVT SrcVT = VecOp.getValueType();
23726  EVT ResVT = SrcVT.getVectorElementType();
23727 
23728  EVT ContainerVT = SrcVT;
23729  if (SrcVT.isFixedLengthVector()) {
23730  ContainerVT = getContainerForFixedLengthVector(DAG, SrcVT);
23731  VecOp = convertToScalableVector(DAG, ContainerVT, VecOp);
23732  }
23733 
23734  SDValue Pg = getPredicateForVector(DAG, DL, SrcVT);
23735  SDValue Zero = DAG.getConstant(0, DL, MVT::i64);
23736 
23737  // Convert operands to Scalable.
23738  AccOp = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, ContainerVT,
23739  DAG.getUNDEF(ContainerVT), AccOp, Zero);
23740 
23741  // Perform reduction.
23742  SDValue Rdx = DAG.getNode(AArch64ISD::FADDA_PRED, DL, ContainerVT,
23743  Pg, AccOp, VecOp);
23744 
23745  return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, ResVT, Rdx, Zero);
23746 }
23747 
23748 SDValue AArch64TargetLowering::LowerPredReductionToSVE(SDValue ReduceOp,
23749  SelectionDAG &DAG) const {
23750  SDLoc DL(ReduceOp);
23751  SDValue Op = ReduceOp.getOperand(0);
23752  EVT OpVT = Op.getValueType();
23753  EVT VT = ReduceOp.getValueType();
23754 
23755  if (!OpVT.isScalableVector() || OpVT.getVectorElementType() != MVT::i1)
23756  return SDValue();
23757 
23758  SDValue Pg = getPredicateForVector(DAG, DL, OpVT);
23759 
23760  switch (ReduceOp.getOpcode()) {
23761  default:
23762  return SDValue();
23763  case ISD::VECREDUCE_OR:
23764  if (isAllActivePredicate(DAG, Pg) && OpVT == MVT::nxv16i1)
23765  // The predicate can be 'Op' because
23766  // vecreduce_or(Op & <all true>) <=> vecreduce_or(Op).
23767  return getPTest(DAG, VT, Op, Op, AArch64CC::ANY_ACTIVE);
23768  else
23769  return getPTest(DAG, VT, Pg, Op, AArch64CC::ANY_ACTIVE);
23770  case ISD::VECREDUCE_AND: {
23771  Op = DAG.getNode(ISD::XOR, DL, OpVT, Op, Pg);
23772  return getPTest(DAG, VT, Pg, Op, AArch64CC::NONE_ACTIVE);
23773  }
23774  case ISD::VECREDUCE_XOR: {
23775  SDValue ID =
23776  DAG.getTargetConstant(Intrinsic::aarch64_sve_cntp, DL, MVT::i64);
23777  if (OpVT == MVT::nxv1i1) {
23778  // Emulate a CNTP on .Q using .D and a different governing predicate.
23781  }
23782  SDValue Cntp =
23784  return DAG.getAnyExtOrTrunc(Cntp, DL, VT);
23785  }
23786  }
23787 
23788  return SDValue();
23789 }
23790 
23791 SDValue AArch64TargetLowering::LowerReductionToSVE(unsigned Opcode,
23792  SDValue ScalarOp,
23793  SelectionDAG &DAG) const {
23794  SDLoc DL(ScalarOp);
23795  SDValue VecOp = ScalarOp.getOperand(0);
23796  EVT SrcVT = VecOp.getValueType();
23797 
23799  SrcVT,
23800  /*OverrideNEON=*/Subtarget->useSVEForFixedLengthVectors())) {
23801  EVT ContainerVT = getContainerForFixedLengthVector(DAG, SrcVT);
23802  VecOp = convertToScalableVector(DAG, ContainerVT, VecOp);
23803  }
23804 
23805  // UADDV always returns an i64 result.
23806  EVT ResVT = (Opcode == AArch64ISD::UADDV_PRED) ? MVT::i64 :
23807  SrcVT.getVectorElementType();
23808  EVT RdxVT = SrcVT;
23809  if (SrcVT.isFixedLengthVector() || Opcode == AArch64ISD::UADDV_PRED)
23810  RdxVT = getPackedSVEVectorVT(ResVT);
23811 
23812  SDValue Pg = getPredicateForVector(DAG, DL, SrcVT);
23813  SDValue Rdx = DAG.getNode(Opcode, DL, RdxVT, Pg, VecOp);
23814  SDValue Res = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, ResVT,
23815  Rdx, DAG.getConstant(0, DL, MVT::i64));
23816 
23817  // The VEC_REDUCE nodes expect an element size result.
23818  if (ResVT != ScalarOp.getValueType())
23819  Res = DAG.getAnyExtOrTrunc(Res, DL, ScalarOp.getValueType());
23820 
23821  return Res;
23822 }
23823 
23824 SDValue
23825 AArch64TargetLowering::LowerFixedLengthVectorSelectToSVE(SDValue Op,
23826  SelectionDAG &DAG) const {
23827  EVT VT = Op.getValueType();
23828  SDLoc DL(Op);
23829 
23830  EVT InVT = Op.getOperand(1).getValueType();
23831  EVT ContainerVT = getContainerForFixedLengthVector(DAG, InVT);
23832  SDValue Op1 = convertToScalableVector(DAG, ContainerVT, Op->getOperand(1));
23833  SDValue Op2 = convertToScalableVector(DAG, ContainerVT, Op->getOperand(2));
23834 
23835  // Convert the mask to a predicated (NOTE: We don't need to worry about
23836  // inactive lanes since VSELECT is safe when given undefined elements).
23837  EVT MaskVT = Op.getOperand(0).getValueType();
23838  EVT MaskContainerVT = getContainerForFixedLengthVector(DAG, MaskVT);
23839  auto Mask = convertToScalableVector(DAG, MaskContainerVT, Op.getOperand(0));
23840  Mask = DAG.getNode(ISD::TRUNCATE, DL,
23841  MaskContainerVT.changeVectorElementType(MVT::i1), Mask);
23842 
23843  auto ScalableRes = DAG.getNode(ISD::VSELECT, DL, ContainerVT,
23844  Mask, Op1, Op2);
23845 
23846  return convertFromScalableVector(DAG, VT, ScalableRes);
23847 }
23848 
23849 SDValue AArch64TargetLowering::LowerFixedLengthVectorSetccToSVE(
23850  SDValue Op, SelectionDAG &DAG) const {
23851  SDLoc DL(Op);
23852  EVT InVT = Op.getOperand(0).getValueType();
23853  EVT ContainerVT = getContainerForFixedLengthVector(DAG, InVT);
23854 
23855  assert(InVT.isFixedLengthVector() && isTypeLegal(InVT) &&
23856  "Only expected to lower fixed length vector operation!");
23857  assert(Op.getValueType() == InVT.changeTypeToInteger() &&
23858  "Expected integer result of the same bit length as the inputs!");
23859 
23860  auto Op1 = convertToScalableVector(DAG, ContainerVT, Op.getOperand(0));
23861  auto Op2 = convertToScalableVector(DAG, ContainerVT, Op.getOperand(1));
23862  auto Pg = getPredicateForFixedLengthVector(DAG, DL, InVT);
23863 
23864  EVT CmpVT = Pg.getValueType();
23865  auto Cmp = DAG.getNode(AArch64ISD::SETCC_MERGE_ZERO, DL, CmpVT,
23866  {Pg, Op1, Op2, Op.getOperand(2)});
23867 
23868  EVT PromoteVT = ContainerVT.changeTypeToInteger();
23869  auto Promote = DAG.getBoolExtOrTrunc(Cmp, DL, PromoteVT, InVT);
23870  return convertFromScalableVector(DAG, Op.getValueType(), Promote);
23871 }
23872 
23873 SDValue
23874 AArch64TargetLowering::LowerFixedLengthBitcastToSVE(SDValue Op,
23875  SelectionDAG &DAG) const {
23876  SDLoc DL(Op);
23877  auto SrcOp = Op.getOperand(0);
23878  EVT VT = Op.getValueType();
23879  EVT ContainerDstVT = getContainerForFixedLengthVector(DAG, VT);
23880  EVT ContainerSrcVT =
23881  getContainerForFixedLengthVector(DAG, SrcOp.getValueType());
23882 
23883  SrcOp = convertToScalableVector(DAG, ContainerSrcVT, SrcOp);
23884  Op = DAG.getNode(ISD::BITCAST, DL, ContainerDstVT, SrcOp);
23885  return convertFromScalableVector(DAG, VT, Op);
23886 }
23887 
23888 SDValue AArch64TargetLowering::LowerFixedLengthConcatVectorsToSVE(
23889  SDValue Op, SelectionDAG &DAG) const {
23890  SDLoc DL(Op);
23891  unsigned NumOperands = Op->getNumOperands();
23892 
23893  assert(NumOperands > 1 && isPowerOf2_32(NumOperands) &&
23894  "Unexpected number of operands in CONCAT_VECTORS");
23895 
23896  auto SrcOp1 = Op.getOperand(0);
23897  auto SrcOp2 = Op.getOperand(1);
23898  EVT VT = Op.getValueType();
23899  EVT SrcVT = SrcOp1.getValueType();
23900 
23901  if (NumOperands > 2) {
23903  EVT PairVT = SrcVT.getDoubleNumVectorElementsVT(*DAG.getContext());
23904  for (unsigned I = 0; I < NumOperands; I += 2)
23905  Ops.push_back(DAG.getNode(ISD::CONCAT_VECTORS, DL, PairVT,
23906  Op->getOperand(I), Op->getOperand(I + 1)));
23907 
23908  return DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, Ops);
23909  }
23910 
23911  EVT ContainerVT = getContainerForFixedLengthVector(DAG, VT);
23912 
23913  SDValue Pg = getPredicateForFixedLengthVector(DAG, DL, SrcVT);
23914  SrcOp1 = convertToScalableVector(DAG, ContainerVT, SrcOp1);
23915  SrcOp2 = convertToScalableVector(DAG, ContainerVT, SrcOp2);
23916 
23917  Op = DAG.getNode(AArch64ISD::SPLICE, DL, ContainerVT, Pg, SrcOp1, SrcOp2);
23918 
23919  return convertFromScalableVector(DAG, VT, Op);
23920 }
23921 
23922 SDValue
23923 AArch64TargetLowering::LowerFixedLengthFPExtendToSVE(SDValue Op,
23924  SelectionDAG &DAG) const {
23925  EVT VT = Op.getValueType();
23926  assert(VT.isFixedLengthVector() && "Expected fixed length vector type!");
23927 
23928  SDLoc DL(Op);
23929  SDValue Val = Op.getOperand(0);
23930  SDValue Pg = getPredicateForVector(DAG, DL, VT);
23931  EVT SrcVT = Val.getValueType();
23932  EVT ContainerVT = getContainerForFixedLengthVector(DAG, VT);
23933  EVT ExtendVT = ContainerVT.changeVectorElementType(
23934  SrcVT.getVectorElementType());
23935 
23936  Val = DAG.getNode(ISD::BITCAST, DL, SrcVT.changeTypeToInteger(), Val);
23937  Val = DAG.getNode(ISD::ANY_EXTEND, DL, VT.changeTypeToInteger(), Val);
23938 
23939  Val = convertToScalableVector(DAG, ContainerVT.changeTypeToInteger(), Val);
23940  Val = getSVESafeBitCast(ExtendVT, Val, DAG);
23941  Val = DAG.getNode(AArch64ISD::FP_EXTEND_MERGE_PASSTHRU, DL, ContainerVT,
23942  Pg, Val, DAG.getUNDEF(ContainerVT));
23943 
23944  return convertFromScalableVector(DAG, VT, Val);
23945 }
23946 
23947 SDValue
23948 AArch64TargetLowering::LowerFixedLengthFPRoundToSVE(SDValue Op,
23949  SelectionDAG &DAG) const {
23950  EVT VT = Op.getValueType();
23951  assert(VT.isFixedLengthVector() && "Expected fixed length vector type!");
23952 
23953  SDLoc DL(Op);
23954  SDValue Val = Op.getOperand(0);
23955  EVT SrcVT = Val.getValueType();
23956  EVT ContainerSrcVT = getContainerForFixedLengthVector(DAG, SrcVT);
23957  EVT RoundVT = ContainerSrcVT.changeVectorElementType(
23958  VT.getVectorElementType());
23959  SDValue Pg = getPredicateForVector(DAG, DL, RoundVT);
23960 
23961  Val = convertToScalableVector(DAG, ContainerSrcVT, Val);
23962  Val = DAG.getNode(AArch64ISD::FP_ROUND_MERGE_PASSTHRU, DL, RoundVT, Pg, Val,
23963  Op.getOperand(1), DAG.getUNDEF(RoundVT));
23964  Val = getSVESafeBitCast(ContainerSrcVT.changeTypeToInteger(), Val, DAG);
23965  Val = convertFromScalableVector(DAG, SrcVT.changeTypeToInteger(), Val);
23966 
23967  Val = DAG.getNode(ISD::TRUNCATE, DL, VT.changeTypeToInteger(), Val);
23968  return DAG.getNode(ISD::BITCAST, DL, VT, Val);
23969 }
23970 
23971 SDValue
23972 AArch64TargetLowering::LowerFixedLengthIntToFPToSVE(SDValue Op,
23973  SelectionDAG &DAG) const {
23974  EVT VT = Op.getValueType();
23975  assert(VT.isFixedLengthVector() && "Expected fixed length vector type!");
23976 
23977  bool IsSigned = Op.getOpcode() == ISD::SINT_TO_FP;
23978  unsigned Opcode = IsSigned ? AArch64ISD::SINT_TO_FP_MERGE_PASSTHRU
23980 
23981  SDLoc DL(Op);
23982  SDValue Val = Op.getOperand(0);
23983  EVT SrcVT = Val.getValueType();
23984  EVT ContainerDstVT = getContainerForFixedLengthVector(DAG, VT);
23985  EVT ContainerSrcVT = getContainerForFixedLengthVector(DAG, SrcVT);
23986 
23987  if (VT.bitsGE(SrcVT)) {
23989 
23990  Val = DAG.getNode(IsSigned ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND, DL,
23991  VT.changeTypeToInteger(), Val);
23992 
23993  // Safe to use a larger than specified operand because by promoting the
23994  // value nothing has changed from an arithmetic point of view.
23995  Val =
23996  convertToScalableVector(DAG, ContainerDstVT.changeTypeToInteger(), Val);
23997  Val = DAG.getNode(Opcode, DL, ContainerDstVT, Pg, Val,
23998  DAG.getUNDEF(ContainerDstVT));
23999  return convertFromScalableVector(DAG, VT, Val);
24000  } else {
24001  EVT CvtVT = ContainerSrcVT.changeVectorElementType(
24002  ContainerDstVT.getVectorElementType());
24003  SDValue Pg = getPredicateForFixedLengthVector(DAG, DL, SrcVT);
24004 
24005  Val = convertToScalableVector(DAG, ContainerSrcVT, Val);
24006  Val = DAG.getNode(Opcode, DL, CvtVT, Pg, Val, DAG.getUNDEF(CvtVT));
24007  Val = getSVESafeBitCast(ContainerSrcVT, Val, DAG);
24008  Val = convertFromScalableVector(DAG, SrcVT, Val);
24009 
24010  Val = DAG.getNode(ISD::TRUNCATE, DL, VT.changeTypeToInteger(), Val);
24011  return DAG.getNode(ISD::BITCAST, DL, VT, Val);
24012  }
24013 }
24014 
24015 SDValue
24016 AArch64TargetLowering::LowerFixedLengthFPToIntToSVE(SDValue Op,
24017  SelectionDAG &DAG) const {
24018  EVT VT = Op.getValueType();
24019  assert(VT.isFixedLengthVector() && "Expected fixed length vector type!");
24020 
24021  bool IsSigned = Op.getOpcode() == ISD::FP_TO_SINT;
24022  unsigned Opcode = IsSigned ? AArch64ISD::FCVTZS_MERGE_PASSTHRU
24024 
24025  SDLoc DL(Op);
24026  SDValue Val = Op.getOperand(0);
24027  EVT SrcVT = Val.getValueType();
24028  EVT ContainerDstVT = getContainerForFixedLengthVector(DAG, VT);
24029  EVT ContainerSrcVT = getContainerForFixedLengthVector(DAG, SrcVT);
24030 
24031  if (VT.bitsGT(SrcVT)) {
24032  EVT CvtVT = ContainerDstVT.changeVectorElementType(
24033  ContainerSrcVT.getVectorElementType());
24035 
24036  Val = DAG.getNode(ISD::BITCAST, DL, SrcVT.changeTypeToInteger(), Val);
24037  Val = DAG.getNode(ISD::ANY_EXTEND, DL, VT, Val);
24038 
24039  Val = convertToScalableVector(DAG, ContainerDstVT, Val);
24040  Val = getSVESafeBitCast(CvtVT, Val, DAG);
24041  Val = DAG.getNode(Opcode, DL, ContainerDstVT, Pg, Val,
24042  DAG.getUNDEF(ContainerDstVT));
24043  return convertFromScalableVector(DAG, VT, Val);
24044  } else {
24045  EVT CvtVT = ContainerSrcVT.changeTypeToInteger();
24046  SDValue Pg = getPredicateForFixedLengthVector(DAG, DL, SrcVT);
24047 
24048  // Safe to use a larger than specified result since an fp_to_int where the
24049  // result doesn't fit into the destination is undefined.
24050  Val = convertToScalableVector(DAG, ContainerSrcVT, Val);
24051  Val = DAG.getNode(Opcode, DL, CvtVT, Pg, Val, DAG.getUNDEF(CvtVT));
24052  Val = convertFromScalableVector(DAG, SrcVT.changeTypeToInteger(), Val);
24053 
24054  return DAG.getNode(ISD::TRUNCATE, DL, VT, Val);
24055  }
24056 }
24057 
24058 SDValue AArch64TargetLowering::LowerFixedLengthVECTOR_SHUFFLEToSVE(
24059  SDValue Op, SelectionDAG &DAG) const {
24060  EVT VT = Op.getValueType();
24061  assert(VT.isFixedLengthVector() && "Expected fixed length vector type!");
24062 
24063  auto *SVN = cast<ShuffleVectorSDNode>(Op.getNode());
24064  auto ShuffleMask = SVN->getMask();
24065 
24066  SDLoc DL(Op);
24067  SDValue Op1 = Op.getOperand(0);
24068  SDValue Op2 = Op.getOperand(1);
24069 
24070  EVT ContainerVT = getContainerForFixedLengthVector(DAG, VT);
24071  Op1 = convertToScalableVector(DAG, ContainerVT, Op1);
24072  Op2 = convertToScalableVector(DAG, ContainerVT, Op2);
24073 
24074  auto MinLegalExtractEltScalarTy = [](EVT ScalarTy) -> EVT {
24075  if (ScalarTy == MVT::i8 || ScalarTy == MVT::i16)
24076  return MVT::i32;
24077  return ScalarTy;
24078  };
24079 
24080  if (SVN->isSplat()) {
24081  unsigned Lane = std::max(0, SVN->getSplatIndex());
24082  EVT ScalarTy = MinLegalExtractEltScalarTy(VT.getVectorElementType());
24083  SDValue SplatEl = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, ScalarTy, Op1,
24084  DAG.getConstant(Lane, DL, MVT::i64));
24085  Op = DAG.getNode(ISD::SPLAT_VECTOR, DL, ContainerVT, SplatEl);
24086  return convertFromScalableVector(DAG, VT, Op);
24087  }
24088 
24089  bool ReverseEXT = false;
24090  unsigned Imm;
24091  if (isEXTMask(ShuffleMask, VT, ReverseEXT, Imm) &&
24092  Imm == VT.getVectorNumElements() - 1) {
24093  if (ReverseEXT)
24094  std::swap(Op1, Op2);
24095  EVT ScalarTy = MinLegalExtractEltScalarTy(VT.getVectorElementType());
24096  SDValue Scalar = DAG.getNode(
24097  ISD::EXTRACT_VECTOR_ELT, DL, ScalarTy, Op1,
24098  DAG.getConstant(VT.getVectorNumElements() - 1, DL, MVT::i64));
24099  Op = DAG.getNode(AArch64ISD::INSR, DL, ContainerVT, Op2, Scalar);
24100  return convertFromScalableVector(DAG, VT, Op);
24101  }
24102 
24103  for (unsigned LaneSize : {64U, 32U, 16U}) {
24104  if (isREVMask(ShuffleMask, VT, LaneSize)) {
24105  EVT NewVT =
24107  unsigned RevOp;
24108  unsigned EltSz = VT.getScalarSizeInBits();
24109  if (EltSz == 8)
24111  else if (EltSz == 16)
24113  else
24115 
24116  Op = DAG.getNode(ISD::BITCAST, DL, NewVT, Op1);
24117  Op = LowerToPredicatedOp(Op, DAG, RevOp);
24118  Op = DAG.getNode(ISD::BITCAST, DL, ContainerVT, Op);
24119  return convertFromScalableVector(DAG, VT, Op);
24120  }
24121  }
24122 
24123  if (Subtarget->hasSVE2p1() && VT.getScalarSizeInBits() == 64 &&
24124  isREVMask(ShuffleMask, VT, 128)) {
24125  if (!VT.isFloatingPoint())
24126  return LowerToPredicatedOp(Op, DAG, AArch64ISD::REVD_MERGE_PASSTHRU);
24127 
24129  Op = DAG.getNode(ISD::BITCAST, DL, NewVT, Op1);
24130  Op = LowerToPredicatedOp(Op, DAG, AArch64ISD::REVD_MERGE_PASSTHRU);
24131  Op = DAG.getNode(ISD::BITCAST, DL, ContainerVT, Op);
24132  return convertFromScalableVector(DAG, VT, Op);
24133  }
24134 
24135  unsigned WhichResult;
24136  if (isZIPMask(ShuffleMask, VT, WhichResult) && WhichResult == 0)
24138  DAG, VT, DAG.getNode(AArch64ISD::ZIP1, DL, ContainerVT, Op1, Op2));
24139 
24140  if (isTRNMask(ShuffleMask, VT, WhichResult)) {
24141  unsigned Opc = (WhichResult == 0) ? AArch64ISD::TRN1 : AArch64ISD::TRN2;
24143  DAG, VT, DAG.getNode(Opc, DL, ContainerVT, Op1, Op2));
24144  }
24145 
24146  if (isZIP_v_undef_Mask(ShuffleMask, VT, WhichResult) && WhichResult == 0)
24148  DAG, VT, DAG.getNode(AArch64ISD::ZIP1, DL, ContainerVT, Op1, Op1));
24149 
24150  if (isTRN_v_undef_Mask(ShuffleMask, VT, WhichResult)) {
24151  unsigned Opc = (WhichResult == 0) ? AArch64ISD::TRN1 : AArch64ISD::TRN2;
24153  DAG, VT, DAG.getNode(Opc, DL, ContainerVT, Op1, Op1));
24154  }
24155 
24156  // Functions like isZIPMask return true when a ISD::VECTOR_SHUFFLE's mask
24157  // represents the same logical operation as performed by a ZIP instruction. In
24158  // isolation these functions do not mean the ISD::VECTOR_SHUFFLE is exactly
24159  // equivalent to an AArch64 instruction. There's the extra component of
24160  // ISD::VECTOR_SHUFFLE's value type to consider. Prior to SVE these functions
24161  // only operated on 64/128bit vector types that have a direct mapping to a
24162  // target register and so an exact mapping is implied.
24163  // However, when using SVE for fixed length vectors, most legal vector types
24164  // are actually sub-vectors of a larger SVE register. When mapping
24165  // ISD::VECTOR_SHUFFLE to an SVE instruction care must be taken to consider
24166  // how the mask's indices translate. Specifically, when the mapping requires
24167  // an exact meaning for a specific vector index (e.g. Index X is the last
24168  // vector element in the register) then such mappings are often only safe when
24169  // the exact SVE register size is know. The main exception to this is when
24170  // indices are logically relative to the first element of either
24171  // ISD::VECTOR_SHUFFLE operand because these relative indices don't change
24172  // when converting from fixed-length to scalable vector types (i.e. the start
24173  // of a fixed length vector is always the start of a scalable vector).
24174  unsigned MinSVESize = Subtarget->getMinSVEVectorSizeInBits();
24175  unsigned MaxSVESize = Subtarget->getMaxSVEVectorSizeInBits();
24176  if (MinSVESize == MaxSVESize && MaxSVESize == VT.getSizeInBits()) {
24177  if (ShuffleVectorInst::isReverseMask(ShuffleMask) && Op2.isUndef()) {
24178  Op = DAG.getNode(ISD::VECTOR_REVERSE, DL, ContainerVT, Op1);
24179  return convertFromScalableVector(DAG, VT, Op);
24180  }
24181 
24182  if (isZIPMask(ShuffleMask, VT, WhichResult) && WhichResult != 0)
24184  DAG, VT, DAG.getNode(AArch64ISD::ZIP2, DL, ContainerVT, Op1, Op2));
24185 
24186  if (isUZPMask(ShuffleMask, VT, WhichResult)) {
24187  unsigned Opc = (WhichResult == 0) ? AArch64ISD::UZP1 : AArch64ISD::UZP2;
24189  DAG, VT, DAG.getNode(Opc, DL, ContainerVT, Op1, Op2));
24190  }
24191 
24192  if (isZIP_v_undef_Mask(ShuffleMask, VT, WhichResult) && WhichResult != 0)
24194  DAG, VT, DAG.getNode(AArch64ISD::ZIP2, DL, ContainerVT, Op1, Op1));
24195 
24196  if (isUZP_v_undef_Mask(ShuffleMask, VT, WhichResult)) {
24197  unsigned Opc = (WhichResult == 0) ? AArch64ISD::UZP1 : AArch64ISD::UZP2;
24199  DAG, VT, DAG.getNode(Opc, DL, ContainerVT, Op1, Op1));
24200  }
24201  }
24202 
24203  return SDValue();
24204 }
24205 
24206 SDValue AArch64TargetLowering::getSVESafeBitCast(EVT VT, SDValue Op,
24207  SelectionDAG &DAG) const {
24208  SDLoc DL(Op);
24209  EVT InVT = Op.getValueType();
24210 
24211  assert(VT.isScalableVector() && isTypeLegal(VT) &&
24212  InVT.isScalableVector() && isTypeLegal(InVT) &&
24213  "Only expect to cast between legal scalable vector types!");
24215  InVT.getVectorElementType() != MVT::i1 &&
24216  "For predicate bitcasts, use getSVEPredicateBitCast");
24217 
24218  if (InVT == VT)
24219  return Op;
24220 
24222  EVT PackedInVT = getPackedSVEVectorVT(InVT.getVectorElementType());
24223 
24224  // Safe bitcasting between unpacked vector types of different element counts
24225  // is currently unsupported because the following is missing the necessary
24226  // work to ensure the result's elements live where they're supposed to within
24227  // an SVE register.
24228  // 01234567
24229  // e.g. nxv2i32 = XX??XX??
24230  // nxv4f16 = X?X?X?X?
24232  VT == PackedVT || InVT == PackedInVT) &&
24233  "Unexpected bitcast!");
24234 
24235  // Pack input if required.
24236  if (InVT != PackedInVT)
24237  Op = DAG.getNode(AArch64ISD::REINTERPRET_CAST, DL, PackedInVT, Op);
24238 
24239  Op = DAG.getNode(ISD::BITCAST, DL, PackedVT, Op);
24240 
24241  // Unpack result if required.
24242  if (VT != PackedVT)
24244 
24245  return Op;
24246 }
24247 
24249  SDValue N) const {
24251 }
24252 
24255 }
24256 
24257 bool AArch64TargetLowering::SimplifyDemandedBitsForTargetNode(
24258  SDValue Op, const APInt &OriginalDemandedBits,
24259  const APInt &OriginalDemandedElts, KnownBits &Known, TargetLoweringOpt &TLO,
24260  unsigned Depth) const {
24261 
24262  unsigned Opc = Op.getOpcode();
24263  switch (Opc) {
24264  case AArch64ISD::VSHL: {
24265  // Match (VSHL (VLSHR Val X) X)
24266  SDValue ShiftL = Op;
24267  SDValue ShiftR = Op->getOperand(0);
24268  if (ShiftR->getOpcode() != AArch64ISD::VLSHR)
24269  return false;
24270 
24271  if (!ShiftL.hasOneUse() || !ShiftR.hasOneUse())
24272  return false;
24273 
24274  unsigned ShiftLBits = ShiftL->getConstantOperandVal(1);
24275  unsigned ShiftRBits = ShiftR->getConstantOperandVal(1);
24276 
24277  // Other cases can be handled as well, but this is not
24278  // implemented.
24279  if (ShiftRBits != ShiftLBits)
24280  return false;
24281 
24282  unsigned ScalarSize = Op.getScalarValueSizeInBits();
24283  assert(ScalarSize > ShiftLBits && "Invalid shift imm");
24284 
24285  APInt ZeroBits = APInt::getLowBitsSet(ScalarSize, ShiftLBits);
24286  APInt UnusedBits = ~OriginalDemandedBits;
24287 
24288  if ((ZeroBits & UnusedBits) != ZeroBits)
24289  return false;
24290 
24291  // All bits that are zeroed by (VSHL (VLSHR Val X) X) are not
24292  // used - simplify to just Val.
24293  return TLO.CombineTo(Op, ShiftR->getOperand(0));
24294  }
24295  case ISD::INTRINSIC_WO_CHAIN: {
24296  if (auto ElementSize = IsSVECntIntrinsic(Op)) {
24297  unsigned MaxSVEVectorSizeInBits = Subtarget->getMaxSVEVectorSizeInBits();
24298  if (!MaxSVEVectorSizeInBits)
24299  MaxSVEVectorSizeInBits = AArch64::SVEMaxBitsPerVector;
24300  unsigned MaxElements = MaxSVEVectorSizeInBits / *ElementSize;
24301  // The SVE count intrinsics don't support the multiplier immediate so we
24302  // don't have to account for that here. The value returned may be slightly
24303  // over the true required bits, as this is based on the "ALL" pattern. The
24304  // other patterns are also exposed by these intrinsics, but they all
24305  // return a value that's strictly less than "ALL".
24306  unsigned RequiredBits = llvm::bit_width(MaxElements);
24307  unsigned BitWidth = Known.Zero.getBitWidth();
24308  if (RequiredBits < BitWidth)
24309  Known.Zero.setHighBits(BitWidth - RequiredBits);
24310  return false;
24311  }
24312  }
24313  }
24314 
24316  Op, OriginalDemandedBits, OriginalDemandedElts, Known, TLO, Depth);
24317 }
24318 
24319 bool AArch64TargetLowering::isTargetCanonicalConstantNode(SDValue Op) const {
24320  return Op.getOpcode() == AArch64ISD::DUP ||
24321  Op.getOpcode() == AArch64ISD::MOVI ||
24322  (Op.getOpcode() == ISD::EXTRACT_SUBVECTOR &&
24323  Op.getOperand(0).getOpcode() == AArch64ISD::DUP) ||
24325 }
24326 
24327 bool AArch64TargetLowering::isConstantUnsignedBitfieldExtractLegal(
24328  unsigned Opc, LLT Ty1, LLT Ty2) const {
24329  return Ty1 == Ty2 && (Ty1 == LLT::scalar(32) || Ty1 == LLT::scalar(64));
24330 }
24331 
24333  return Subtarget->hasComplxNum();
24334 }
24335 
24338  auto *VTy = dyn_cast<FixedVectorType>(Ty);
24339  if (!VTy)
24340  return false;
24341 
24342  auto *ScalarTy = VTy->getScalarType();
24343  unsigned NumElements = VTy->getNumElements();
24344 
24345  unsigned VTyWidth = VTy->getScalarSizeInBits() * NumElements;
24346  if ((VTyWidth < 128 && VTyWidth != 64) || !llvm::isPowerOf2_32(VTyWidth))
24347  return false;
24348 
24349  return (ScalarTy->isHalfTy() && Subtarget->hasFullFP16()) ||
24350  ScalarTy->isFloatTy() || ScalarTy->isDoubleTy();
24351 }
24352 
24355  ComplexDeinterleavingRotation Rotation, Value *InputA, Value *InputB,
24356  Value *Accumulator) const {
24357  FixedVectorType *Ty = cast<FixedVectorType>(InputA->getType());
24358 
24359  IRBuilder<> B(I);
24360 
24361  unsigned TyWidth = Ty->getScalarSizeInBits() * Ty->getNumElements();
24362 
24363  assert(((TyWidth >= 128 && llvm::isPowerOf2_32(TyWidth)) || TyWidth == 64) &&
24364  "Vector type must be either 64 or a power of 2 that is at least 128");
24365 
24366  if (TyWidth > 128) {
24367  int Stride = Ty->getNumElements() / 2;
24368  auto SplitSeq = llvm::seq<int>(0, Ty->getNumElements());
24369  auto SplitSeqVec = llvm::to_vector(SplitSeq);
24370  ArrayRef<int> LowerSplitMask(&SplitSeqVec[0], Stride);
24371  ArrayRef<int> UpperSplitMask(&SplitSeqVec[Stride], Stride);
24372 
24373  auto *LowerSplitA = B.CreateShuffleVector(InputA, LowerSplitMask);
24374  auto *LowerSplitB = B.CreateShuffleVector(InputB, LowerSplitMask);
24375  auto *UpperSplitA = B.CreateShuffleVector(InputA, UpperSplitMask);
24376  auto *UpperSplitB = B.CreateShuffleVector(InputB, UpperSplitMask);
24377  Value *LowerSplitAcc = nullptr;
24378  Value *UpperSplitAcc = nullptr;
24379 
24380  if (Accumulator) {
24381  LowerSplitAcc = B.CreateShuffleVector(Accumulator, LowerSplitMask);
24382  UpperSplitAcc = B.CreateShuffleVector(Accumulator, UpperSplitMask);
24383  }
24384 
24385  auto *LowerSplitInt = createComplexDeinterleavingIR(
24386  I, OperationType, Rotation, LowerSplitA, LowerSplitB, LowerSplitAcc);
24387  auto *UpperSplitInt = createComplexDeinterleavingIR(
24388  I, OperationType, Rotation, UpperSplitA, UpperSplitB, UpperSplitAcc);
24389 
24390  ArrayRef<int> JoinMask(&SplitSeqVec[0], Ty->getNumElements());
24391  return B.CreateShuffleVector(LowerSplitInt, UpperSplitInt, JoinMask);
24392  }
24393 
24394  if (OperationType == ComplexDeinterleavingOperation::CMulPartial) {
24395  Intrinsic::ID IdMap[4] = {Intrinsic::aarch64_neon_vcmla_rot0,
24396  Intrinsic::aarch64_neon_vcmla_rot90,
24397  Intrinsic::aarch64_neon_vcmla_rot180,
24398  Intrinsic::aarch64_neon_vcmla_rot270};
24399 
24400  if (Accumulator == nullptr)
24401  Accumulator = ConstantFP::get(Ty, 0);
24402 
24403  return B.CreateIntrinsic(IdMap[(int)Rotation], Ty,
24404  {Accumulator, InputB, InputA});
24405  }
24406 
24407  if (OperationType == ComplexDeinterleavingOperation::CAdd) {
24410  IntId = Intrinsic::aarch64_neon_vcadd_rot90;
24411  else if (Rotation == ComplexDeinterleavingRotation::Rotation_270)
24412  IntId = Intrinsic::aarch64_neon_vcadd_rot270;
24413 
24414  if (IntId == Intrinsic::not_intrinsic)
24415  return nullptr;
24416 
24417  return B.CreateIntrinsic(IntId, Ty, {InputA, InputB});
24418  }
24419 
24420  return nullptr;
24421 }
llvm::ISD::SUB
@ SUB
Definition: ISDOpcodes.h:240
llvm::Check::Size
@ Size
Definition: FileCheck.h:77
EnableOptimizeLogicalImm
static cl::opt< bool > EnableOptimizeLogicalImm("aarch64-enable-logical-imm", cl::Hidden, cl::desc("Enable AArch64 logical imm instruction " "optimization"), cl::init(true))
llvm::TargetLoweringBase::getPreferredVectorAction
virtual TargetLoweringBase::LegalizeTypeAction getPreferredVectorAction(MVT VT) const
Return the preferred vector type legalization action.
Definition: TargetLowering.h:479
llvm::AArch64ISD::LD1x2post
@ LD1x2post
Definition: AArch64ISelLowering.h:454
llvm::AArch64ISD::MOPS_MEMSET_TAGGING
@ MOPS_MEMSET_TAGGING
Definition: AArch64ISelLowering.h:486
llvm::ISD::FPOWI
@ FPOWI
Definition: ISDOpcodes.h:916
llvm::CCValAssign::getLocVT
MVT getLocVT() const
Definition: CallingConvLower.h:130
llvm::AArch64ISD::EORV_PRED
@ EORV_PRED
Definition: AArch64ISelLowering.h:276
llvm::AArch64ISD::NodeType
NodeType
Definition: AArch64ISelLowering.h:50
OP_VTRNL
@ OP_VTRNL
Definition: ARMISelLowering.cpp:8324
llvm::SDNode::getConstantOperandVal
uint64_t getConstantOperandVal(unsigned Num) const
Helper method returns the integer value of a ConstantSDNode operand.
Definition: SelectionDAGNodes.h:1627
OP_VDUP0
@ OP_VDUP0
Definition: ARMISelLowering.cpp:8313
llvm::SelectionDAG::getMemcpy
SDValue getMemcpy(SDValue Chain, const SDLoc &dl, SDValue Dst, SDValue Src, SDValue Size, Align Alignment, bool isVol, bool AlwaysInline, bool isTailCall, MachinePointerInfo DstPtrInfo, MachinePointerInfo SrcPtrInfo, const AAMDNodes &AAInfo=AAMDNodes(), AAResults *AA=nullptr)
Definition: SelectionDAG.cpp:7458
llvm::ISD::FROUNDEVEN
@ FROUNDEVEN
Definition: ISDOpcodes.h:928
llvm::TargetLowering::SimplifyDemandedBitsForTargetNode
virtual bool SimplifyDemandedBitsForTargetNode(SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts, KnownBits &Known, TargetLoweringOpt &TLO, unsigned Depth=0) const
Attempt to simplify any target nodes based on the demanded bits/elts, returning true on success.
Definition: TargetLowering.cpp:3545
i
i
Definition: README.txt:29
llvm::RegState::Undef
@ Undef
Value of the register doesn't matter.
Definition: MachineInstrBuilder.h:52
llvm::MVT::v1i16
@ v1i16
Definition: MachineValueType.h:97
llvm::MVT::nxv4i1
@ nxv4i1
Definition: MachineValueType.h:208
llvm::InstructionCost
Definition: InstructionCost.h:29
llvm::ISD::STRICT_FP_ROUND
@ STRICT_FP_ROUND
X = STRICT_FP_ROUND(Y, TRUNC) - Rounding 'Y' from a larger floating point type down to the precision ...
Definition: ISDOpcodes.h:464
llvm::ISD::SETUGE
@ SETUGE
Definition: ISDOpcodes.h:1447
llvm::AArch64ISD::LD2DUPpost
@ LD2DUPpost
Definition: AArch64ISelLowering.h:461
llvm::alignTo
uint64_t alignTo(uint64_t Size, Align A)
Returns a multiple of A needed to store Size bytes.
Definition: Alignment.h:155
tryExtendDUPToExtractHigh
static SDValue tryExtendDUPToExtractHigh(SDValue N, SelectionDAG &DAG)
Definition: AArch64ISelLowering.cpp:17046
foldCSELOfCSEL
static SDValue foldCSELOfCSEL(SDNode *Op, SelectionDAG &DAG)
Definition: AArch64ISelLowering.cpp:20273
llvm::TargetLoweringBase::ShiftLegalizationStrategy::LowerToLibcall
@ LowerToLibcall
llvm::TargetLoweringBase::MaxStoresPerMemsetOptSize
unsigned MaxStoresPerMemsetOptSize
Likewise for functions with the OptSize attribute.
Definition: TargetLowering.h:3438
llvm::AArch64II::MO_G3
@ MO_G3
MO_G3 - A symbol operand with this flag (granule 3) represents the high 16-bits of a 64-bit address,...
Definition: AArch64BaseInfo.h:715
llvm::AArch64ISD::SVE_LD4_MERGE_ZERO
@ SVE_LD4_MERGE_ZERO
Definition: AArch64ISelLowering.h:370
llvm::TargetMachine::getOptLevel
CodeGenOpt::Level getOptLevel() const
Returns the optimization level: None, Less, Default, or Aggressive.
Definition: TargetMachine.cpp:182
llvm::isAsynchronousEHPersonality
bool isAsynchronousEHPersonality(EHPersonality Pers)
Returns true if this personality function catches asynchronous exceptions.
Definition: EHPersonalities.h:49
llvm::AArch64ISD::UMULL
@ UMULL
Definition: AArch64ISelLowering.h:308
llvm::CCValAssign::ZExt
@ ZExt
Definition: CallingConvLower.h:36
ValueTypes.h
llvm::AArch64ISD::LOADgot
@ LOADgot
Definition: AArch64ISelLowering.h:78
llvm::TargetLoweringBase::getPointerMemTy
virtual MVT getPointerMemTy(const DataLayout &DL, uint32_t AS=0) const
Return the in-memory pointer type for the given address space, defaults to the pointer type from the ...
Definition: TargetLowering.h:372
llvm::AArch64_AM::encodeAdvSIMDModImmType3
static uint8_t encodeAdvSIMDModImmType3(uint64_t Imm)
Definition: AArch64AddressingModes.h:487
llvm::AArch64TargetLowering::isOpSuitableForLSE128
bool isOpSuitableForLSE128(const Instruction *I) const
Definition: AArch64ISelLowering.cpp:22606
llvm::Argument
This class represents an incoming formal argument to a Function.
Definition: Argument.h:28
llvm::SMEAttrs::hasZAState
bool hasZAState() const
Definition: AArch64SMEAttributes.h:79
llvm::AArch64ISD::LD3post
@ LD3post
Definition: AArch64ISelLowering.h:449
llvm::AArch64CC::Invalid
@ Invalid
Definition: AArch64BaseInfo.h:272
llvm::ShuffleVectorSDNode::getSplatIndex
int getSplatIndex() const
Definition: SelectionDAGNodes.h:1552
llvm::ConstantSDNode
Definition: SelectionDAGNodes.h:1586
llvm::AArch64Subtarget::isTargetWindows
bool isTargetWindows() const
Definition: AArch64Subtarget.h:264
llvm::AArch64ISD::UUNPKHI
@ UUNPKHI
Definition: AArch64ISelLowering.h:320
llvm::ISD::SETCCCARRY
@ SETCCCARRY
Like SetCC, ops #0 and #1 are the LHS and RHS operands to compare, but op #2 is a boolean indicating ...
Definition: ISDOpcodes.h:744
llvm::AArch64ISD::LDIAPP
@ LDIAPP
Definition: AArch64ISelLowering.h:478
llvm::AArch64FunctionInfo::setArgumentStackToRestore
void setArgumentStackToRestore(unsigned bytes)
Definition: AArch64MachineFunctionInfo.h:215
llvm::MaskedLoadSDNode::getExtensionType
ISD::LoadExtType getExtensionType() const
Definition: SelectionDAGNodes.h:2670
FALKOR_STRIDED_ACCESS_MD
#define FALKOR_STRIDED_ACCESS_MD
Definition: AArch64InstrInfo.h:33
llvm::StoreSDNode::getBasePtr
const SDValue & getBasePtr() const
Definition: SelectionDAGNodes.h:2398
AArch64RegisterInfo.h
llvm::AtomicOrdering::AcquireRelease
@ AcquireRelease
llvm::ISD::STRICT_FSETCC
@ STRICT_FSETCC
STRICT_FSETCC/STRICT_FSETCCS - Constrained versions of SETCC, used for floating-point operands only.
Definition: ISDOpcodes.h:475
areOperandsOfVmullHighP64
static bool areOperandsOfVmullHighP64(Value *Op1, Value *Op2)
Check if Op1 and Op2 could be used with vmull_high_p64 intrinsic.
Definition: AArch64ISelLowering.cpp:13862
llvm::Type::FloatTyID
@ FloatTyID
32-bit floating point type
Definition: Type.h:58
Attrs
Function Attrs
Definition: README_ALTIVEC.txt:215
llvm::Type::isSized
bool isSized(SmallPtrSetImpl< Type * > *Visited=nullptr) const
Return true if it makes sense to take the size of this type.
Definition: Type.h:295
tryAdvSIMDModImm16
static SDValue tryAdvSIMDModImm16(unsigned NewOp, SDValue Op, SelectionDAG &DAG, const APInt &Bits, const SDValue *LHS=nullptr)
Definition: AArch64ISelLowering.cpp:11844
llvm::MVT::nxv2i1
@ nxv2i1
Definition: MachineValueType.h:207
llvm::TargetLoweringBase::setSchedulingPreference
void setSchedulingPreference(Sched::Preference Pref)
Specify the target scheduling preference.
Definition: TargetLowering.h:2309
llvm::AArch64CC::LO
@ LO
Definition: AArch64BaseInfo.h:258
OP_VZIPL
@ OP_VZIPL
Definition: ARMISelLowering.cpp:8322
llvm::ISD::VECTOR_SHUFFLE
@ VECTOR_SHUFFLE
VECTOR_SHUFFLE(VEC1, VEC2) - Returns a vector, of the same type as VEC1/VEC2.
Definition: ISDOpcodes.h:586
llvm::AArch64ISD::VLSHR
@ VLSHR
Definition: AArch64ISelLowering.h:211
llvm::SDValue::dump
void dump() const
Definition: SelectionDAGNodes.h:1193
llvm::MVT::getVectorElementType
MVT getVectorElementType() const
Definition: MachineValueType.h:548
Signed
@ Signed
Definition: NVPTXISelLowering.cpp:4884
llvm::SelectionDAG::getCALLSEQ_START
SDValue getCALLSEQ_START(SDValue Chain, uint64_t InSize, uint64_t OutSize, const SDLoc &DL)
Return a new CALLSEQ_START node, that starts new call frame, in which InSize bytes are set up inside ...
Definition: SelectionDAG.h:1022
llvm::AArch64ISD::SSTNT1_INDEX_PRED
@ SSTNT1_INDEX_PRED
Definition: AArch64ISelLowering.h:427
llvm::ISD::isSignedIntSetCC
bool isSignedIntSetCC(CondCode Code)
Return true if this is a setcc instruction that performs a signed comparison when used with integer o...
Definition: ISDOpcodes.h:1467
llvm::ScalableVectorType
Class to represent scalable SIMD vectors.
Definition: DerivedTypes.h:572
llvm::ISD::SETLE
@ SETLE
Definition: ISDOpcodes.h:1458
llvm::MVT::getStoreSize
TypeSize getStoreSize() const
Return the number of bytes overwritten by a store of the specified value type.
Definition: MachineValueType.h:1154
llvm::ISD::INTRINSIC_VOID
@ INTRINSIC_VOID
OUTCHAIN = INTRINSIC_VOID(INCHAIN, INTRINSICID, arg1, arg2, ...) This node represents a target intrin...
Definition: ISDOpcodes.h:199
llvm::KnownBits::lshr
static KnownBits lshr(const KnownBits &LHS, const KnownBits &RHS)
Compute known bits for lshr(LHS, RHS).
Definition: KnownBits.cpp:221
llvm::AArch64ISD::ST1x4post
@ ST1x4post
Definition: AArch64ISelLowering.h:459
llvm::ISD::SETO
@ SETO
Definition: ISDOpcodes.h:1443
llvm::ISD::isOverflowIntrOpRes
bool isOverflowIntrOpRes(SDValue Op)
Returns true if the specified value is the overflow result from one of the overflow intrinsic nodes.
Definition: SelectionDAGNodes.h:3117
llvm::MVT::v4f16
@ v4f16
Definition: MachineValueType.h:149
llvm::AArch64ISD::UMINV
@ UMINV
Definition: AArch64ISelLowering.h:265
llvm::ISD::MemIndexedMode
MemIndexedMode
MemIndexedMode enum - This enum defines the load / store indexed addressing modes.
Definition: ISDOpcodes.h:1383
GPRArgRegs
static const MCPhysReg GPRArgRegs[]
Definition: AArch64ISelLowering.cpp:144
llvm::AArch64Subtarget::supportsAddressTopByteIgnored
bool supportsAddressTopByteIgnored() const
CPU has TBI (top byte of addresses is ignored during HW address translation) and OS enables it.
Definition: AArch64Subtarget.cpp:446
llvm::AArch64_AM::isAdvSIMDModImmType6
static bool isAdvSIMDModImmType6(uint64_t Imm)
Definition: AArch64AddressingModes.h:528
MI
IRTranslator LLVM IR MI
Definition: IRTranslator.cpp:109
llvm::AArch64CC::HI
@ HI
Definition: AArch64BaseInfo.h:263
MachineInstr.h
llvm::MaskedGatherSDNode
This class is used to represent an MGATHER node.
Definition: SelectionDAGNodes.h:2845
llvm::ISD::UMULO
@ UMULO
Definition: ISDOpcodes.h:332
MathExtras.h
llvm::EVT::getScalarStoreSize
uint64_t getScalarStoreSize() const
Definition: ValueTypes.h:369
llvm::APInt::sadd_ov
APInt sadd_ov(const APInt &RHS, bool &Overflow) const
Definition: APInt.cpp:1926
llvm::MachineInstrBuilder::addImm
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
Definition: MachineInstrBuilder.h:131
llvm::ISD::STRICT_FSQRT
@ STRICT_FSQRT
Constrained versions of libm-equivalent floating point intrinsics.
Definition: ISDOpcodes.h:411
isExtendedBUILD_VECTOR
static bool isExtendedBUILD_VECTOR(SDNode *N, SelectionDAG &DAG, bool isSigned)
Definition: AArch64ISelLowering.cpp:4408
llvm::Type::DoubleTyID
@ DoubleTyID
64-bit floating point type
Definition: Type.h:59
llvm::AArch64TargetLowering::getSDagStackGuard
Value * getSDagStackGuard(const Module &M) const override
Return the variable that's previously inserted by insertSSPDeclarations, if any, otherwise return nul...
Definition: AArch64ISelLowering.cpp:22948
llvm::AArch64ISD::CLASTA_N
@ CLASTA_N
Definition: AArch64ISelLowering.h:323
llvm::bit_width
int bit_width(T Value)
Returns the number of bits needed to represent Value if Value is nonzero.
Definition: bit.h:281
llvm
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
llvm::AArch64TargetLowering::useLoadStackGuardNode
bool useLoadStackGuardNode() const override
If this function returns true, SelectionDAGBuilder emits a LOAD_STACK_GUARD node when it is lowering ...
Definition: AArch64ISelLowering.cpp:22566
llvm::objcarc::hasAttachedCallOpBundle
bool hasAttachedCallOpBundle(const CallBase *CB)
Definition: ObjCARCUtil.h:29
splitStoreSplat
static SDValue splitStoreSplat(SelectionDAG &DAG, StoreSDNode &St, SDValue SplatVal, unsigned NumVecElts)
Definition: AArch64ISelLowering.cpp:18577
llvm::TargetOptions::GuaranteedTailCallOpt
unsigned GuaranteedTailCallOpt
GuaranteedTailCallOpt - This flag is enabled when -tailcallopt is specified on the commandline.
Definition: TargetOptions.h:221
AArch64MachineFunctionInfo.h
llvm::AArch64TargetLowering::generateFMAsInMachineCombiner
bool generateFMAsInMachineCombiner(EVT VT, CodeGenOpt::Level OptLevel) const override
Definition: AArch64ISelLowering.cpp:14952
llvm::ISD::JumpTable
@ JumpTable
Definition: ISDOpcodes.h:81
llvm::AArch64TargetLowering::shouldInsertTrailingFenceForAtomicStore
bool shouldInsertTrailingFenceForAtomicStore(const Instruction *I) const override
Whether AtomicExpandPass should automatically insert a trailing fence without reducing the ordering f...
Definition: AArch64ISelLowering.cpp:22656
llvm::Type::getInt1Ty
static IntegerType * getInt1Ty(LLVMContext &C)
Definition: Type.cpp:238
llvm::AArch64ISD::ADDP
@ ADDP
Definition: AArch64ISelLowering.h:253
llvm::CallingConv::Win64
@ Win64
The C convention as implemented on Windows/x86-64 and AArch64.
Definition: CallingConv.h:156
BlockSize
static const int BlockSize
Definition: TarWriter.cpp:33
M
We currently emits eax Perhaps this is what we really should generate is Is imull three or four cycles eax eax The current instruction priority is based on pattern complexity The former is more complex because it folds a load so the latter will not be emitted Perhaps we should use AddedComplexity to give LEA32r a higher priority We should always try to match LEA first since the LEA matching code does some estimate to determine whether the match is profitable if we care more about code then imull is better It s two bytes shorter than movl leal On a Pentium M
Definition: README.txt:252
llvm::Instruction::getModule
const Module * getModule() const
Return the module owning the function this instruction belongs to or nullptr it the function does not...
Definition: Instruction.cpp:70
llvm::AArch64FunctionInfo::getVarArgsFPRIndex
int getVarArgsFPRIndex() const
Definition: AArch64MachineFunctionInfo.h:351
tryAdvSIMDModImm8
static SDValue tryAdvSIMDModImm8(unsigned NewOp, SDValue Op, SelectionDAG &DAG, const APInt &Bits)
Definition: AArch64ISelLowering.cpp:11919
llvm::MVT::getFixedSizeInBits
uint64_t getFixedSizeInBits() const
Return the size of the specified fixed width value type in bits.
Definition: MachineValueType.h:1140
llvm::AArch64CC::AL
@ AL
Definition: AArch64BaseInfo.h:269
llvm::ElementCount::getScalable
static constexpr ElementCount getScalable(ScalarTy MinVal)
Definition: TypeSize.h:294
llvm::ISD::PATCHPOINT
@ PATCHPOINT
Definition: ISDOpcodes.h:1303
llvm::ISD::VECTOR_REVERSE
@ VECTOR_REVERSE
VECTOR_REVERSE(VECTOR) - Returns a vector, of the same type as VECTOR, whose elements are shuffled us...
Definition: ISDOpcodes.h:577
llvm::SDNode::getValueType
EVT getValueType(unsigned ResNo) const
Return the type of a specified result.
Definition: SelectionDAGNodes.h:986
llvm::SelectionDAG::getSelectionDAGInfo
const SelectionDAGTargetInfo & getSelectionDAGInfo() const
Definition: SelectionDAG.h:476
llvm::TargetLoweringBase::shouldConvertFpToSat
virtual bool shouldConvertFpToSat(unsigned Op, EVT FPVT, EVT VT) const
Should we generate fp_to_si_sat and fp_to_ui_sat from type FPVT to type VT from min(max(fptoi)) satur...
Definition: TargetLowering.h:3139
llvm::AArch64TargetLowering::getSSPStackGuardCheck
Function * getSSPStackGuardCheck(const Module &M) const override
If the target has a standard stack protection check function that performs validation and error handl...
Definition: AArch64ISelLowering.cpp:22955
llvm::EVT::isScalarInteger
bool isScalarInteger() const
Return true if this is an integer, but not a vector.
Definition: ValueTypes.h:149
llvm::MVT::nxv2f64
@ nxv2f64
Definition: MachineValueType.h:267
getNegatedInteger
static SDValue getNegatedInteger(SDValue Op, SelectionDAG &DAG)
Definition: AArch64ISelLowering.cpp:17372
llvm::drop_begin
auto drop_begin(T &&RangeOrContainer, size_t N=1)
Return a range covering RangeOrContainer with the first N elements excluded.
Definition: STLExtras.h:386
llvm::ISD::STRICT_FSIN
@ STRICT_FSIN
Definition: ISDOpcodes.h:414
performInsertVectorEltCombine
static SDValue performInsertVectorEltCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI)
Definition: AArch64ISelLowering.cpp:21426
llvm::AArch64ISD::FMINV_PRED
@ FMINV_PRED
Definition: AArch64ISelLowering.h:334
getAArch64XALUOOp
static std::pair< SDValue, SDValue > getAArch64XALUOOp(AArch64CC::CondCode &CC, SDValue Op, SelectionDAG &DAG)
Definition: AArch64ISelLowering.cpp:3585
llvm::AArch64ISD::SITOF
@ SITOF
Definition: AArch64ISelLowering.h:295
llvm::AArch64CC::NE
@ NE
Definition: AArch64BaseInfo.h:256
llvm::RecurKind::Or
@ Or
Bitwise or logical OR of integers.
llvm::MaskedValueIsZero
bool MaskedValueIsZero(const Value *V, const APInt &Mask, const DataLayout &DL, unsigned Depth=0, AssumptionCache *AC=nullptr, const Instruction *CxtI=nullptr, const DominatorTree *DT=nullptr, bool UseInstrInfo=true)
Return true if 'V & Mask' is known to be zero.
Definition: ValueTracking.cpp:364
isLanes1toNKnownZero
static bool isLanes1toNKnownZero(SDValue Op)
Definition: AArch64ISelLowering.cpp:21366
llvm::AArch64_AM::isAdvSIMDModImmType12
static bool isAdvSIMDModImmType12(uint64_t Imm)
Definition: AArch64AddressingModes.h:706
llvm::PatternMatch::m_Mask
Definition: PatternMatch.h:1514
llvm::SDLoc
Wrapper class for IR location info (IR ordering and DebugLoc) to be passed into SDNode creation funct...
Definition: SelectionDAGNodes.h:1106
OP_VREV
@ OP_VREV
Definition: ARMISelLowering.cpp:8312
llvm::MVT::isFixedLengthVector
bool isFixedLengthVector() const
Definition: MachineValueType.h:404
llvm::CCValAssign::Full
@ Full
Definition: CallingConvLower.h:34
llvm::TargetLoweringBase::Legal
@ Legal
Definition: TargetLowering.h:197
llvm::StructType::get
static StructType * get(LLVMContext &Context, ArrayRef< Type * > Elements, bool isPacked=false)
This static method is the primary way to create a literal StructType.
Definition: Type.cpp:408
llvm::ARM::PredBlockMask::TT
@ TT
llvm::DataLayout
A parsed version of the target data layout string in and methods for querying it.
Definition: DataLayout.h:110
llvm::MachineOperand::CreateReg
static MachineOperand CreateReg(Register Reg, bool isDef, bool isImp=false, bool isKill=false, bool isDead=false, bool isUndef=false, bool isEarlyClobber=false, unsigned SubReg=0, bool isDebug=false, bool isInternalRead=false, bool isRenamable=false)
Definition: MachineOperand.h:833
llvm::ISD::OR
@ OR
Definition: ISDOpcodes.h:667
performAddDotCombine
static SDValue performAddDotCombine(SDNode *N, SelectionDAG &DAG)
Definition: AArch64ISelLowering.cpp:17346
llvm::MVT::isInteger
bool isInteger() const
Return true if this is an integer or a vector integer type.
Definition: MachineValueType.h:376
llvm::MVT::v1f64
@ v1f64
Definition: MachineValueType.h:189
combineSVEReductionOrderedFP
static SDValue combineSVEReductionOrderedFP(SDNode *N, unsigned Opc, SelectionDAG &DAG)
Definition: AArch64ISelLowering.cpp:18100
llvm::TargetMachine::useEmulatedTLS
bool useEmulatedTLS() const
Returns true if this target uses emulated TLS.
Definition: TargetMachine.cpp:146
llvm::AArch64II::MO_HI12
@ MO_HI12
MO_HI12 - This flag indicates that a symbol operand represents the bits 13-24 of a 64-bit address,...
Definition: AArch64BaseInfo.h:732
AArch64SetCCInfo
Helper structure to keep track of a SET_CC lowered into AArch64 code.
Definition: AArch64ISelLowering.cpp:17106
llvm::Value::hasOneUse
bool hasOneUse() const
Return true if there is exactly one use of this value.
Definition: Value.h:434
llvm::MachineBasicBlock::getBasicBlock
const BasicBlock * getBasicBlock() const
Return the LLVM basic block that this instance corresponded to originally.
Definition: MachineBasicBlock.h:213
Upa
@ Upa
Definition: AArch64ISelLowering.cpp:9957
llvm::AArch64::getSMEPseudoMap
int getSMEPseudoMap(uint16_t Opcode)
llvm::AArch64TargetLowering::targetShrinkDemandedConstant
bool targetShrinkDemandedConstant(SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts, TargetLoweringOpt &TLO) const override
Definition: AArch64ISelLowering.cpp:2099
performSETCCCombine
static SDValue performSETCCCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, SelectionDAG &DAG)
Definition: AArch64ISelLowering.cpp:20393
llvm::generic_gep_type_iterator
Definition: GetElementPtrTypeIterator.h:31
llvm::ISD::SETGT
@ SETGT
Definition: ISDOpcodes.h:1455
llvm::ISD::BITCAST
@ BITCAST
BITCAST - This operator converts between integer, vector and FP values, as if the value was stored to...
Definition: ISDOpcodes.h:885
llvm::Type::getInt8PtrTy
static PointerType * getInt8PtrTy(LLVMContext &C, unsigned AS=0)
Definition: Type.cpp:293
llvm::SelectionDAG::SignBitIsZero
bool SignBitIsZero(SDValue Op, unsigned Depth=0) const
Return true if the sign bit of Op is known to be zero.
Definition: SelectionDAG.cpp:2551
llvm::ISD::SETNE
@ SETNE
Definition: ISDOpcodes.h:1459
llvm::MachineRegisterInfo::createVirtualRegister
Register createVirtualRegister(const TargetRegisterClass *RegClass, StringRef Name="")
createVirtualRegister - Create and return a new virtual register in the function with the specified r...
Definition: MachineRegisterInfo.cpp:157
llvm::KnownBits::trunc
KnownBits trunc(unsigned BitWidth) const
Return known bits for a truncation of the value we're tracking.
Definition: KnownBits.h:152
Insert
Vector Rotate Left Mask Mask Insert
Definition: README_P9.txt:112
isCheapToExtend
static bool isCheapToExtend(const SDValue &N)
Definition: AArch64ISelLowering.cpp:18504
llvm::TargetLowering::getSingleConstraintMatchWeight
virtual ConstraintWeight getSingleConstraintMatchWeight(AsmOperandInfo &info, const char *constraint) const
Examine constraint string and operand type and determine a weight value.
Definition: TargetLowering.cpp:5604
getExtFactor
static unsigned getExtFactor(SDValue &V)
getExtFactor - Determine the adjustment factor for the position when generating an "extract from vect...
Definition: AArch64ISelLowering.cpp:10315
llvm::MachineFrameInfo::setReturnAddressIsTaken
void setReturnAddressIsTaken(bool s)
Definition: MachineFrameInfo.h:378
llvm::TargetLowering::ConstraintType
ConstraintType
Definition: TargetLowering.h:4620
llvm::ISD::BR_JT
@ BR_JT
BR_JT - Jumptable branch.
Definition: ISDOpcodes.h:990
llvm::TargetLoweringBase::getTypeToTransformTo
virtual EVT getTypeToTransformTo(LLVMContext &Context, EVT VT) const
For types supported by the target, this is an identity function.
Definition: TargetLowering.h:1005
llvm::EVT::getVectorElementCount
ElementCount getVectorElementCount() const
Definition: ValueTypes.h:322
calculatePreExtendType
static EVT calculatePreExtendType(SDValue Extend)
Calculates what the pre-extend type is, based on the extension operation node provided by Extend.
Definition: AArch64ISelLowering.cpp:15502
PHI
Rewrite undef for PHI
Definition: AMDGPURewriteUndefForPHI.cpp:101
llvm::AArch64ISD::FCMGEz
@ FCMGEz
Definition: AArch64ISelLowering.h:242
trySwapVSelectOperands
static SDValue trySwapVSelectOperands(SDNode *N, SelectionDAG &DAG)
Definition: AArch64ISelLowering.cpp:20675
llvm::SelectionDAG::addNoMergeSiteInfo
void addNoMergeSiteInfo(const SDNode *Node, bool NoMerge)
Set NoMergeSiteInfo to be associated with Node if NoMerge is true.
Definition: SelectionDAG.h:2274
llvm::BasicBlock::getParent
const Function * getParent() const
Return the enclosing method, or null if none.
Definition: BasicBlock.h:112
llvm::ConstantSDNode::getAPIntValue
const APInt & getAPIntValue() const
Definition: SelectionDAGNodes.h:1600
IntrinsicInst.h
llvm::Type::isPointerTy
bool isPointerTy() const
True if this is an instance of PointerType.
Definition: Type.h:249
isAllConstantBuildVector
static bool isAllConstantBuildVector(const SDValue &PotentialBVec, uint64_t &ConstVal)
Definition: AArch64ISelLowering.cpp:11973
llvm::AArch64TargetLowering::createFastISel
FastISel * createFastISel(FunctionLoweringInfo &funcInfo, const TargetLibraryInfo *libInfo) const override
This method returns a target specific FastISel object, or null if the target does not support "fast" ...
Definition: AArch64ISelLowering.cpp:2310
llvm::AArch64CC::getNZCVToSatisfyCondCode
static unsigned getNZCVToSatisfyCondCode(CondCode Code)
Given a condition code, return NZCV flags that would satisfy that condition.
Definition: AArch64BaseInfo.h:313
llvm::AArch64II::MO_G1
@ MO_G1
MO_G1 - A symbol operand with this flag (granule 1) represents the bits 16-31 of a 64-bit address,...
Definition: AArch64BaseInfo.h:723
llvm::ShuffleVectorInst::getShuffleMask
static void getShuffleMask(const Constant *Mask, SmallVectorImpl< int > &Result)
Convert the input shuffle mask operand to a vector of integers.
Definition: Instructions.cpp:2225
llvm::ISD::NON_EXTLOAD
@ NON_EXTLOAD
Definition: ISDOpcodes.h:1414
llvm::TargetOptions
Definition: TargetOptions.h:124
AtomicOrdering.h
llvm::CCState
CCState - This class holds information needed while lowering arguments and return values.
Definition: CallingConvLower.h:168
llvm::ElementCount
Definition: TypeSize.h:279
llvm::AArch64ISD::SMAX_PRED
@ SMAX_PRED
Definition: AArch64ISelLowering.h:114
llvm::ISD::FMINNUM
@ FMINNUM
FMINNUM/FMAXNUM - Perform floating-point minimum or maximum on two values.
Definition: ISDOpcodes.h:942
llvm::ISD::AssertSext
@ AssertSext
AssertSext, AssertZext - These nodes record if a register contains a value that has already been zero...
Definition: ISDOpcodes.h:61
llvm::SelectionDAG::getCopyToReg
SDValue getCopyToReg(SDValue Chain, const SDLoc &dl, unsigned Reg, SDValue N)
Definition: SelectionDAG.h:769
llvm::Function::empty
bool empty() const
Definition: Function.h:757
MaxXors
static cl::opt< unsigned > MaxXors("aarch64-max-xors", cl::init(16), cl::Hidden, cl::desc("Maximum of xors"))
llvm::ISD::STRICT_FMAXNUM
@ STRICT_FMAXNUM
Definition: ISDOpcodes.h:423
llvm::EVT::getFixedSizeInBits
uint64_t getFixedSizeInBits() const
Return the size of the specified fixed width value type in bits.
Definition: ValueTypes.h:348
llvm::AArch64CC::MI
@ MI
Definition: AArch64BaseInfo.h:259
llvm::MachineRegisterInfo
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
Definition: MachineRegisterInfo.h:51
llvm::AArch64TargetLowering::useSVEForFixedLengthVectorVT
bool useSVEForFixedLengthVectorVT(EVT VT, bool OverrideNEON=false) const
Definition: AArch64ISelLowering.cpp:6152
llvm::HexagonISD::JT
@ JT
Definition: HexagonISelLowering.h:52
llvm::AArch64ISD::CCMN
@ CCMN
Definition: AArch64ISelLowering.h:162
llvm::AArch64ISD::DUP_MERGE_PASSTHRU
@ DUP_MERGE_PASSTHRU
Definition: AArch64ISelLowering.h:348
T
llvm::SDValue::getNode
SDNode * getNode() const
get the SDNode which holds the desired result
Definition: SelectionDAGNodes.h:159
llvm::AArch64TargetLowering::isLegalICmpImmediate
bool isLegalICmpImmediate(int64_t) const override
Return true if the specified immediate is legal icmp immediate, that is the target has icmp instructi...
Definition: AArch64ISelLowering.cpp:14853
llvm::AArch64TargetLowering::emitLoadLinked
Value * emitLoadLinked(IRBuilderBase &Builder, Type *ValueTy, Value *Addr, AtomicOrdering Ord) const override
Perform a load-linked operation on Addr, returning a "Value *" with the corresponding pointee type.
Definition: AArch64ISelLowering.cpp:22799
llvm::APInt::isMask
bool isMask(unsigned numBits) const
Definition: APInt.h:476
llvm::AArch64ISD::SMSTOP
@ SMSTOP
Definition: AArch64ISelLowering.h:69
isTRNMask
static bool isTRNMask(ArrayRef< int > M, EVT VT, unsigned &WhichResult)
Definition: AArch64ISelLowering.cpp:10875
llvm::AArch64ISD::FCMLEz
@ FCMLEz
Definition: AArch64ISelLowering.h:244
llvm::AArch64ISD::CTPOP_MERGE_PASSTHRU
@ CTPOP_MERGE_PASSTHRU
Definition: AArch64ISelLowering.h:347
llvm::isOneConstant
bool isOneConstant(SDValue V)
Returns true if V is a constant integer one.
Definition: SelectionDAG.cpp:10932
performGlobalAddressCombine
static SDValue performGlobalAddressCombine(SDNode *N, SelectionDAG &DAG, const AArch64Subtarget *Subtarget, const TargetMachine &TM)
Definition: AArch64ISelLowering.cpp:20875
llvm::AArch64ISD::GLDFF1S_UXTW_SCALED_MERGE_ZERO
@ GLDFF1S_UXTW_SCALED_MERGE_ZERO
Definition: AArch64ISelLowering.h:404
llvm::MachineInstrBuilder::add
const MachineInstrBuilder & add(const MachineOperand &MO) const
Definition: MachineInstrBuilder.h:224
llvm::AArch64ISD::PREFETCH
@ PREFETCH
Definition: AArch64ISelLowering.h:292
llvm::Function
Definition: Function.h:59
llvm::ISD::ConstantFP
@ ConstantFP
Definition: ISDOpcodes.h:77
llvm::Loop
Represents a single loop in the control flow graph.
Definition: LoopInfo.h:547
performUADDVCombine
static SDValue performUADDVCombine(SDNode *N, SelectionDAG &DAG)
Definition: AArch64ISelLowering.cpp:15355
llvm::DenseMapBase< DenseMap< KeyT, ValueT, DenseMapInfo< KeyT >, llvm::detail::DenseMapPair< KeyT, ValueT > >, KeyT, ValueT, DenseMapInfo< KeyT >, llvm::detail::DenseMapPair< KeyT, ValueT > >::lookup
ValueT lookup(const_arg_type_t< KeyT > Val) const
lookup - Return the entry for the specified key, or a default constructed value if no such entry exis...
Definition: DenseMap.h:197
llvm::ISD::CONCAT_VECTORS
@ CONCAT_VECTORS
CONCAT_VECTORS(VECTOR0, VECTOR1, ...) - Given a number of values of vector type with the same length ...
Definition: ISDOpcodes.h:542
llvm::TargetLowering::TargetLoweringOpt::LegalOps
bool LegalOps
Definition: TargetLowering.h:3657
StringRef.h
P
This currently compiles esp xmm0 movsd esp eax eax esp ret We should use not the dag combiner This is because dagcombine2 needs to be able to see through the X86ISD::Wrapper which DAGCombine can t really do The code for turning x load into a single vector load is target independent and should be moved to the dag combiner The code for turning x load into a vector load can only handle a direct load from a global or a direct load from the stack It should be generalized to handle any load from P
Definition: README-SSE.txt:411
emitConjunction
static SDValue emitConjunction(SelectionDAG &DAG, SDValue Val, AArch64CC::CondCode &OutCC)
Emit expression as a conjunction (a series of CCMP/CFCMP ops).
Definition: AArch64ISelLowering.cpp:3405
llvm::ISD::BSWAP
@ BSWAP
Byte Swap and Counting operators.
Definition: ISDOpcodes.h:700
llvm::ISD::UDIV
@ UDIV
Definition: ISDOpcodes.h:243
llvm::ISD::STRICT_UINT_TO_FP
@ STRICT_UINT_TO_FP
Definition: ISDOpcodes.h:449
llvm::AArch64ISD::UMAX_PRED
@ UMAX_PRED
Definition: AArch64ISelLowering.h:119
llvm::AArch64TargetLowering::needsFixedCatchObjects
bool needsFixedCatchObjects() const override
Used for exception handling on Win64.
Definition: AArch64ISelLowering.cpp:23123
llvm::ISD::STRICT_FMINNUM
@ STRICT_FMINNUM
Definition: ISDOpcodes.h:424
llvm::ISD::UADDO
@ UADDO
Definition: ISDOpcodes.h:324
llvm::MVT::i128
@ i128
Definition: MachineValueType.h:50
llvm::MVT::nxv2f32
@ nxv2f32
Definition: MachineValueType.h:261
llvm::AArch64ISD::TC_RETURN
@ TC_RETURN
Definition: AArch64ISelLowering.h:289
llvm::AArch64TargetLowering::EmitTileLoad
MachineBasicBlock * EmitTileLoad(unsigned Opc, unsigned BaseReg, MachineInstr &MI, MachineBasicBlock *BB) const
Definition: AArch64ISelLowering.cpp:2698
llvm::details::FixedOrScalableQuantity::isScalable
constexpr bool isScalable() const
Returns whether the quantity is scaled by a runtime quantity (vscale).
Definition: TypeSize.h:166
llvm::AArch64ISD::CTLZ_MERGE_PASSTHRU
@ CTLZ_MERGE_PASSTHRU
Definition: AArch64ISelLowering.h:346
llvm::ISD::DYNAMIC_STACKALLOC
@ DYNAMIC_STACKALLOC
DYNAMIC_STACKALLOC - Allocate some number of bytes on the stack aligned to a specified boundary.
Definition: ISDOpcodes.h:975
llvm::AArch64ISD::LD3LANEpost
@ LD3LANEpost
Definition: AArch64ISelLowering.h:466
performCONDCombine
static SDValue performCONDCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, SelectionDAG &DAG, unsigned CCIndex, unsigned CmpIndex)
Definition: AArch64ISelLowering.cpp:20085
llvm::AArch64TargetLowering::getRoundingControlRegisters
ArrayRef< MCPhysReg > getRoundingControlRegisters() const override
Returns a 0 terminated array of rounding control registers that can be attached into strict FP call.
Definition: AArch64ISelLowering.cpp:14969
llvm::AArch64ISD::SST1_UXTW_SCALED_PRED
@ SST1_UXTW_SCALED_PRED
Definition: AArch64ISelLowering.h:421
llvm::ISD::ATOMIC_LOAD_CLR
@ ATOMIC_LOAD_CLR
Definition: ISDOpcodes.h:1190
optimizeWhile
static SDValue optimizeWhile(SDValue Op, SelectionDAG &DAG, bool IsSigned, bool IsLess, bool IsEqual)
Definition: AArch64ISelLowering.cpp:4706
llvm::AArch64Subtarget::getMinSVEVectorSizeInBits
unsigned getMinSVEVectorSizeInBits() const
Definition: AArch64Subtarget.h:375
convertFixedMaskToScalableVector
static SDValue convertFixedMaskToScalableVector(SDValue Mask, SelectionDAG &DAG)
Definition: AArch64ISelLowering.cpp:23352
llvm::AArch64CC::NONE_ACTIVE
@ NONE_ACTIVE
Definition: AArch64BaseInfo.h:278
llvm::SDNode::isUndef
bool isUndef() const
Return true if the type of the node type undefined.
Definition: SelectionDAGNodes.h:667
llvm::SelectionDAG::getValueType
SDValue getValueType(EVT)
Definition: SelectionDAG.cpp:1884
llvm::AArch64ISD::SBCS
@ SBCS
Definition: AArch64ISelLowering.h:157
llvm::AArch64_AM::getFP16Imm
static int getFP16Imm(const APInt &Imm)
getFP16Imm - Return an 8-bit floating-point version of the 16-bit floating-point value.
Definition: AArch64AddressingModes.h:368
llvm::APInt::isPowerOf2
bool isPowerOf2() const
Check if this APInt's value is a power of two greater than zero.
Definition: APInt.h:432
llvm::BuildVectorSDNode::getConstantFPSplatPow2ToLog2Int
int32_t getConstantFPSplatPow2ToLog2Int(BitVector *UndefElements, uint32_t BitWidth) const
If this is a constant FP splat and the splatted constant FP is an exact power or 2,...
Definition: SelectionDAG.cpp:11923
llvm::AArch64ISD::TBNZ
@ TBNZ
Definition: AArch64ISelLowering.h:286
llvm::ARM_MB::LD
@ LD
Definition: ARMBaseInfo.h:72
OP_COPY
@ OP_COPY
Definition: ARMISelLowering.cpp:8311
llvm::ISD::ADDC
@ ADDC
Carry-setting nodes for multiple precision addition and subtraction.
Definition: ISDOpcodes.h:269
llvm::AArch64ISD::FMAXNMV_PRED
@ FMAXNMV_PRED
Definition: AArch64ISelLowering.h:333
llvm::AArch64ISD::SADDV
@ SADDV
Definition: AArch64ISelLowering.h:249
llvm::KnownBits::Zero
APInt Zero
Definition: KnownBits.h:24
C1
instcombine should handle this C2 when C1
Definition: README.txt:263
GetElementPtrTypeIterator.h
contains
return AArch64::GPR64RegClass contains(Reg)
isSetCCOrZExtSetCC
static bool isSetCCOrZExtSetCC(const SDValue &Op, SetCCInfoAndKind &Info)
Definition: AArch64ISelLowering.cpp:17174
llvm::ComplexDeinterleavingRotation::Rotation_90
@ Rotation_90
llvm::isPackedVectorType
bool isPackedVectorType(EVT SomeVT)
Definition: VECustomDAG.cpp:22
llvm::AArch64ISD::ASSERT_ZEXT_BOOL
@ ASSERT_ZEXT_BOOL
Definition: AArch64ISelLowering.h:435
llvm::AArch64ISD::LD1x3post
@ LD1x3post
Definition: AArch64ISelLowering.h:455
llvm::Attribute::get
static Attribute get(LLVMContext &Context, AttrKind Kind, uint64_t Val=0)
Return a uniquified Attribute object.
Definition: Attributes.cpp:91
performLD1Combine
static SDValue performLD1Combine(SDNode *N, SelectionDAG &DAG, unsigned Opc)
Definition: AArch64ISelLowering.cpp:18647
llvm::Type::getScalarType
Type * getScalarType() const
If this is a vector type, return the element type, otherwise return 'this'.
Definition: Type.h:341
llvm::TargetLoweringBase::MaxStoresPerMemset
unsigned MaxStoresPerMemset
Specify maximum number of store instructions per memset call.
Definition: TargetLowering.h:3436
llvm::TLSModel::GeneralDynamic
@ GeneralDynamic
Definition: CodeGen.h:46
llvm::ISD::MLOAD
@ MLOAD
Definition: ISDOpcodes.h:1211
llvm::AArch64ISD::UZP1
@ UZP1
Definition: AArch64ISelLowering.h:199
changeFPCCToANDAArch64CC
static void changeFPCCToANDAArch64CC(ISD::CondCode CC, AArch64CC::CondCode &CondCode, AArch64CC::CondCode &CondCode2)
Convert a DAG fp condition code to an AArch64 CC.
Definition: AArch64ISelLowering.cpp:2974
llvm::ConstantInt::getValue
const APInt & getValue() const
Return the constant as an APInt value reference.
Definition: Constants.h:132
performSpliceCombine
static SDValue performSpliceCombine(SDNode *N, SelectionDAG &DAG)
Definition: AArch64ISelLowering.cpp:18969
llvm::AtomicRMWInst::getOperation
BinOp getOperation() const
Definition: Instructions.h:812
llvm::AArch64ISD::ZIP1
@ ZIP1
Definition: AArch64ISelLowering.h:197
llvm::MachinePointerInfo::getUnknownStack
static MachinePointerInfo getUnknownStack(MachineFunction &MF)
Stack memory without other information.
Definition: MachineOperand.cpp:1067
llvm::SelectionDAG::getFrameIndex
SDValue getFrameIndex(int FI, EVT VT, bool isTarget=false)
Definition: SelectionDAG.cpp:1765
llvm::SmallVector
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1199
llvm::AArch64_AM::encodeAdvSIMDModImmType1
static uint8_t encodeAdvSIMDModImmType1(uint64_t Imm)
Definition: AArch64AddressingModes.h:457
Statistic.h
llvm::ISD::FP_TO_UINT_SAT
@ FP_TO_UINT_SAT
Definition: ISDOpcodes.h:839
performConcatVectorsCombine
static SDValue performConcatVectorsCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, SelectionDAG &DAG)
Definition: AArch64ISelLowering.cpp:16721
llvm::CallingConv::Fast
@ Fast
Attempts to make calls as fast as possible (e.g.
Definition: CallingConv.h:41
llvm::ISD::SETEQ
@ SETEQ
Definition: ISDOpcodes.h:1454
llvm::AArch64ISD::MULHU_PRED
@ MULHU_PRED
Definition: AArch64ISelLowering.h:109
llvm::MVT::isVector
bool isVector() const
Return true if this is a vector value type.
Definition: MachineValueType.h:392
llvm::AArch64::SMEMatrixTypeMask
@ SMEMatrixTypeMask
Definition: AArch64InstrInfo.h:586
llvm::AArch64_AM::isAdvSIMDModImmType4
static bool isAdvSIMDModImmType4(uint64_t Imm)
Definition: AArch64AddressingModes.h:497
llvm::ISD::STACKRESTORE
@ STACKRESTORE
STACKRESTORE has two operands, an input chain and a pointer to restore to it returns an output chain.
Definition: ISDOpcodes.h:1056
llvm::MemSDNode::getMergedOrdering
AtomicOrdering getMergedOrdering() const
Return a single atomic ordering that is at least as strong as both the success and failure orderings ...
Definition: SelectionDAGNodes.h:1341
llvm::ISD::isVectorShrinkable
bool isVectorShrinkable(const SDNode *N, unsigned NewEltSize, bool Signed)
Returns true if the specified node is a vector where all elements can be truncated to the specified e...
Definition: SelectionDAG.cpp:296
isTRN_v_undef_Mask
static bool isTRN_v_undef_Mask(ArrayRef< int > M, EVT VT, unsigned &WhichResult)
isTRN_v_undef_Mask - Special case of isTRNMask for canonical form of "vector_shuffle v,...
Definition: AArch64ISelLowering.cpp:10929
llvm::SelectionDAG::getVTList
SDVTList getVTList(EVT VT)
Return an SDVTList that represents the list of values specified.
Definition: SelectionDAG.cpp:9508
llvm::AArch64TargetLowering::isDesirableToCommuteWithShift
bool isDesirableToCommuteWithShift(const SDNode *N, CombineLevel Level) const override
Returns false if N is a bit extraction pattern of (X >> C) & Mask.
Definition: AArch64ISelLowering.cpp:14975
llvm::AArch64ISD::GLDFF1S_SCALED_MERGE_ZERO
@ GLDFF1S_SCALED_MERGE_ZERO
Definition: AArch64ISelLowering.h:401
llvm::AArch64ISD::CMHS
@ CMHS
Definition: AArch64ISelLowering.h:230
llvm::TargetLoweringBase::setMaximumJumpTableSize
void setMaximumJumpTableSize(unsigned)
Indicate the maximum number of entries in jump tables.
Definition: TargetLoweringBase.cpp:2011
resolveBuildVector
static bool resolveBuildVector(BuildVectorSDNode *BVN, APInt &CnstBits, APInt &UndefBits)
Definition: AArch64ISelLowering.cpp:11748
llvm::MVT::v2i1
@ v2i1
Definition: MachineValueType.h:67
llvm::AArch64CC::FIRST_ACTIVE
@ FIRST_ACTIVE
Definition: AArch64BaseInfo.h:276
llvm::AArch64ISD::STNP
@ STNP
Definition: AArch64ISelLowering.h:482
llvm::AArch64ISD::BIC
@ BIC
Definition: AArch64ISelLowering.h:123
llvm::APInt::getSExtValue
int64_t getSExtValue() const
Get sign extended value.
Definition: APInt.h:1516
performVectorShiftCombine
static SDValue performVectorShiftCombine(SDNode *N, const AArch64TargetLowering &TLI, TargetLowering::DAGCombinerInfo &DCI)
Optimize a vector shift instruction and its operand if shifted out bits are not used.
Definition: AArch64ISelLowering.cpp:19208
llvm::LegacyLegalizeActions::Bitcast
@ Bitcast
Perform the operation on a different, but equivalently sized type.
Definition: LegacyLegalizerInfo.h:54
llvm::MipsISD::Lo
@ Lo
Definition: MipsISelLowering.h:79
llvm::AArch64ISD::GLD1_SXTW_SCALED_MERGE_ZERO
@ GLD1_SXTW_SCALED_MERGE_ZERO
Definition: AArch64ISelLowering.h:378
llvm::MachineSDNode
An SDNode that represents everything that will be needed to construct a MachineInstr.
Definition: SelectionDAGNodes.h:2901
llvm::AArch64ISD::ST3LANEpost
@ ST3LANEpost
Definition: AArch64ISelLowering.h:469
llvm::CallingConv::PreserveMost
@ PreserveMost
Used for runtime calls that preserves most registers.
Definition: CallingConv.h:63
ErrorHandling.h
llvm::AArch64ISD::UMIN_PRED
@ UMIN_PRED
Definition: AArch64ISelLowering.h:120
llvm::TargetTransformInfo
This pass provides access to the codegen interfaces that are needed for IR-level transformations.
Definition: TargetTransformInfo.h:196
lookThroughSignExtension
std::pair< SDValue, uint64_t > lookThroughSignExtension(SDValue Val)
Definition: AArch64ISelLowering.cpp:8394
performFPExtendCombine
static SDValue performFPExtendCombine(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const AArch64Subtarget *Subtarget)
Definition: AArch64ISelLowering.cpp:21455
llvm::X86Disassembler::Reg
Reg
All possible values of the reg field in the ModR/M byte.
Definition: X86DisassemblerDecoder.h:462
llvm::ISD::MGATHER
@ MGATHER
Definition: ISDOpcodes.h:1223
llvm::IRBuilder<>
llvm::AArch64ISD::FCEIL_MERGE_PASSTHRU
@ FCEIL_MERGE_PASSTHRU
Definition: AArch64ISelLowering.h:130
SetCCInfo::AArch64
AArch64SetCCInfo AArch64
Definition: AArch64ISelLowering.cpp:17114
llvm::MachineMemOperand::MOInvariant
@ MOInvariant
The memory access always returns the same value (or traps).
Definition: MachineMemOperand.h:144
llvm::SelectionDAG::getVectorIdxConstant
SDValue getVectorIdxConstant(uint64_t Val, const SDLoc &DL, bool isTarget=false)
Definition: SelectionDAG.cpp:1672
llvm::erase_if
void erase_if(Container &C, UnaryPredicate P)
Provide a container algorithm similar to C++ Library Fundamentals v2's erase_if which is equivalent t...
Definition: STLExtras.h:1998
llvm::APInt::zextOrTrunc
APInt zextOrTrunc(unsigned width) const
Zero extend or truncate to width.
Definition: APInt.cpp:994
llvm::ISD::STEP_VECTOR
@ STEP_VECTOR
STEP_VECTOR(IMM) - Returns a scalable vector whose lanes are comprised of a linear sequence of unsign...
Definition: ISDOpcodes.h:632
llvm::AArch64ISD::LD2post
@ LD2post
Definition: AArch64ISelLowering.h:448
llvm::LLT::scalar
static constexpr LLT scalar(unsigned SizeInBits)
Get a low-level scalar or aggregate "bag of bits".
Definition: LowLevelTypeImpl.h:42
OP_VZIPR
@ OP_VZIPR
Definition: ARMISelLowering.cpp:8323
llvm::APInt::isOne
bool isOne() const
Determine if this is a value of 1.
Definition: APInt.h:378
getIntrinsicID
static unsigned getIntrinsicID(const SDNode *N)
Definition: AArch64ISelLowering.cpp:6202
AArch64BaseInfo.h
llvm::AArch64ISD::MULHS_PRED
@ MULHS_PRED
Definition: AArch64ISelLowering.h:108
llvm::ISD::MemIndexType
MemIndexType
MemIndexType enum - This enum defines how to interpret MGATHER/SCATTER's index parameter when calcula...
Definition: ISDOpcodes.h:1396
llvm::AArch64ISD::FRECPX_MERGE_PASSTHRU
@ FRECPX_MERGE_PASSTHRU
Definition: AArch64ISelLowering.h:134
getScaledOffsetForBitWidth
static SDValue getScaledOffsetForBitWidth(SelectionDAG &DAG, SDValue Offset, SDLoc DL, unsigned BitWidth)
Definition: AArch64ISelLowering.cpp:20939
performCTLZCombine
static SDValue performCTLZCombine(SDNode *N, SelectionDAG &DAG, const AArch64Subtarget *Subtarget)
Definition: AArch64ISelLowering.cpp:20926
llvm::AArch64Subtarget::isTargetDarwin
bool isTargetDarwin() const
Definition: AArch64Subtarget.h:261
llvm::MemSDNode::getMemoryVT
EVT getMemoryVT() const
Return the type of the in-memory value.
Definition: SelectionDAGNodes.h:1355
llvm::AArch64::getFPRArgRegs
const ArrayRef< MCPhysReg > getFPRArgRegs()
Definition: AArch64ISelLowering.cpp:153
ValueTracking.h
llvm::AArch64Subtarget::isTargetELF
bool isTargetELF() const
Definition: AArch64Subtarget.h:270
performBuildShuffleExtendCombine
static SDValue performBuildShuffleExtendCombine(SDValue BV, SelectionDAG &DAG)
Combines a buildvector(sext/zext) or shuffle(sext/zext, undef) node pattern into sext/zext(buildvecto...
Definition: AArch64ISelLowering.cpp:15540
llvm::AArch64ISD::STG
@ STG
Definition: AArch64ISelLowering.h:472
performMulVectorExtendCombine
static SDValue performMulVectorExtendCombine(SDNode *Mul, SelectionDAG &DAG)
Combines a mul(dup(sext/zext)) node pattern into mul(sext/zext(dup)) making use of the vector SExt/ZE...
Definition: AArch64ISelLowering.cpp:15604
llvm::ISD::FLOG2
@ FLOG2
Definition: ISDOpcodes.h:919
llvm::MemSDNode::getChain
const SDValue & getChain() const
Definition: SelectionDAGNodes.h:1378
llvm::ISD::ANY_EXTEND
@ ANY_EXTEND
ANY_EXTEND - Used for integer types. The high bits are undefined.
Definition: ISDOpcodes.h:766
llvm::AArch64ISD::CMGE
@ CMGE
Definition: AArch64ISelLowering.h:227
llvm::TargetLoweringBase::getSafeStackPointerLocation
virtual Value * getSafeStackPointerLocation(IRBuilderBase &IRB) const
Returns the target-specific address of the unsafe stack pointer.
Definition: TargetLoweringBase.cpp:1897
performSetccAddFolding
static SDValue performSetccAddFolding(SDNode *Op, SelectionDAG &DAG)
Definition: AArch64ISelLowering.cpp:17187
llvm::AArch64TargetLowering::getAsmOperandValueType
EVT getAsmOperandValueType(const DataLayout &DL, Type *Ty, bool AllowUnknown=false) const override
Definition: AArch64ISelLowering.cpp:10137
llvm::ISD::USUBSAT
@ USUBSAT
Definition: ISDOpcodes.h:350
llvm::AtomicOrdering::SequentiallyConsistent
@ SequentiallyConsistent
llvm::SDNode
Represents one node in the SelectionDAG.
Definition: SelectionDAGNodes.h:463
llvm::TargetLoweringBase::getLibcallName
const char * getLibcallName(RTLIB::Libcall Call) const
Get the libcall routine name for the specified libcall.
Definition: TargetLowering.h:3177
tryConvertSVEWideCompare
static SDValue tryConvertSVEWideCompare(SDNode *N, ISD::CondCode CC, TargetLowering::DAGCombinerInfo &DCI, SelectionDAG &DAG)
Definition: AArch64ISelLowering.cpp:17962
llvm::MVT::nxv2i64
@ nxv2i64
Definition: MachineValueType.h:237
llvm::Type::getTypeID
TypeID getTypeID() const
Return the type id for the type.
Definition: Type.h:137
llvm::Triple
Triple - Helper class for working with autoconf configuration names.
Definition: Triple.h:44
MachineBasicBlock.h
llvm::TargetLowering::CW_Constant
@ CW_Constant
Definition: TargetLowering.h:4642
llvm::TargetLowering::SimplifyDemandedBits
bool SimplifyDemandedBits(SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts, KnownBits &Known, TargetLoweringOpt &TLO, unsigned Depth=0, bool AssumeSingleUse=false) const
Look at Op.
Definition: TargetLowering.cpp:1061
llvm::ConstantSDNode::isAllOnes
bool isAllOnes() const
Definition: SelectionDAGNodes.h:1613
llvm::ISD::FMA
@ FMA
FMA - Perform a * b + c with no intermediate rounding step.
Definition: ISDOpcodes.h:482
llvm::AArch64ISD::FCMGTz
@ FCMGTz
Definition: AArch64ISelLowering.h:243
llvm::ISD::FP_TO_SINT
@ FP_TO_SINT
FP_TO_[US]INT - Convert a floating point value to a signed or unsigned integer.
Definition: ISDOpcodes.h:819
llvm::AArch64ISD::SMAXV_PRED
@ SMAXV_PRED
Definition: AArch64ISelLowering.h:271
llvm::AArch64ISD::CMLTz
@ CMLTz
Definition: AArch64ISelLowering.h:240
llvm::TargetLowering::DAGCombinerInfo::DAG
SelectionDAG & DAG
Definition: TargetLowering.h:3939
llvm::AArch64TargetLowering::getScratchRegisters
const MCPhysReg * getScratchRegisters(CallingConv::ID CC) const override
Returns a 0 terminated array of registers that can be safely used as scratch registers.
Definition: AArch64ISelLowering.cpp:14959
llvm::SelectionDAG::ReplaceAllUsesWith
void ReplaceAllUsesWith(SDValue From, SDValue To)
Modify anything using 'From' to use 'To' instead.
Definition: SelectionDAG.cpp:10380
performBSPExpandForSVE
static SDValue performBSPExpandForSVE(SDNode *N, SelectionDAG &DAG, const AArch64Subtarget *Subtarget)
Definition: AArch64ISelLowering.cpp:21493
llvm::TargetTransformInfo::TCK_CodeSize
@ TCK_CodeSize
Instruction code size.
Definition: TargetTransformInfo.h:244
llvm::cl::Hidden
@ Hidden
Definition: CommandLine.h:138
llvm::LoadSDNode
This class is used to represent ISD::LOAD nodes.
Definition: SelectionDAGNodes.h:2348
llvm::MVT::Glue
@ Glue
Definition: MachineValueType.h:282
llvm::AArch64ISD::FMAX_PRED
@ FMAX_PRED
Definition: AArch64ISelLowering.h:99
llvm::AArch64CC::ANY_ACTIVE
@ ANY_ACTIVE
Definition: AArch64BaseInfo.h:275
llvm::MemOp
Definition: TargetLowering.h:112
llvm::AArch64ISD::DUP
@ DUP
Definition: AArch64ISelLowering.h:172
llvm::CCValAssign::Indirect
@ Indirect
Definition: CallingConvLower.h:50
llvm::SelectionDAG::getMemBasePlusOffset
SDValue getMemBasePlusOffset(SDValue Base, TypeSize Offset, const SDLoc &DL, const SDNodeFlags Flags=SDNodeFlags())
Returns sum of the base pointer and offset.
Definition: SelectionDAG.cpp:6941
llvm::SDNode::use_iterator
This class provides iterator support for SDUse operands that use a specific SDNode.
Definition: SelectionDAGNodes.h:745
OP_VDUP3
@ OP_VDUP3
Definition: ARMISelLowering.cpp:8316
APInt.h
llvm::AArch64TargetLowering::getMaxSupportedInterleaveFactor
unsigned getMaxSupportedInterleaveFactor() const override
Get the maximum supported factor for interleaved memory accesses.
Definition: AArch64ISelLowering.h:638
performAddCSelIntoCSinc
static SDValue performAddCSelIntoCSinc(SDNode *N, SelectionDAG &DAG)
Perform the scalar expression combine in the form of: CSEL(c, 1, cc) + b => CSINC(b+c,...
Definition: AArch64ISelLowering.cpp:17273
llvm::TargetRegisterInfo
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
Definition: TargetRegisterInfo.h:236
llvm::AArch64ISD::FCVTZS_MERGE_PASSTHRU
@ FCVTZS_MERGE_PASSTHRU
Definition: AArch64ISelLowering.h:145
llvm::MaskedGatherScatterSDNode::getMask
const SDValue & getMask() const
Definition: SelectionDAGNodes.h:2834
isZeroExtended
static bool isZeroExtended(SDNode *N, SelectionDAG &DAG)
Definition: AArch64ISelLowering.cpp:4465
llvm::Depth
@ Depth
Definition: SIMachineScheduler.h:36
llvm::TargetLowering::C_Memory
@ C_Memory
Definition: TargetLowering.h:4623
llvm::AArch64ISD::LDFF1_MERGE_ZERO
@ LDFF1_MERGE_ZERO
Definition: AArch64ISelLowering.h:362
llvm::APInt::getSignedMaxValue
static APInt getSignedMaxValue(unsigned numBits)
Gets maximum signed value of APInt for a specific bit width.
Definition: APInt.h:189
llvm::AArch64FunctionInfo::isSVECC
bool isSVECC() const
Definition: AArch64MachineFunctionInfo.h:203
Shift
bool Shift
Definition: README.txt:468
OP_VUZPL
@ OP_VUZPL
Definition: ARMISelLowering.cpp:8320
performPostLD1Combine
static SDValue performPostLD1Combine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, bool IsLaneOp)
Target-specific DAG combine function for post-increment LD1 (lane) and post-increment LD1R.
Definition: AArch64ISelLowering.cpp:19248
llvm::ISD::STRICT_FMAXIMUM
@ STRICT_FMAXIMUM
Definition: ISDOpcodes.h:434
llvm::AArch64Subtarget::getInstrInfo
const AArch64InstrInfo * getInstrInfo() const override
Definition: AArch64Subtarget.h:181
llvm::AArch64ISD::ST1x2post
@ ST1x2post
Definition: AArch64ISelLowering.h:457
llvm::codeview::ExportFlags::IsData
@ IsData
llvm::ISD::SETULE
@ SETULE
Definition: ISDOpcodes.h:1449
llvm::AArch64TargetLowering::isTruncateFree
bool isTruncateFree(Type *Ty1, Type *Ty2) const override
Return true if it's free to truncate a value of type FromTy to type ToTy.
Definition: AArch64ISelLowering.cpp:13642
llvm::EVT::isScalableVector
bool isScalableVector() const
Return true if this is a vector type where the runtime length is machine dependent.
Definition: ValueTypes.h:160
llvm::MVT::nxv4f16
@ nxv4f16
Definition: MachineValueType.h:248
performIntToFpCombine
static SDValue performIntToFpCombine(SDNode *N, SelectionDAG &DAG, const AArch64Subtarget *Subtarget)
Definition: AArch64ISelLowering.cpp:15879
llvm::Type
The instances of the Type class are immutable: once they are created, they are never changed.
Definition: Type.h:45
llvm::RTLIB::Libcall
Libcall
RTLIB::Libcall enum - This enum defines all of the runtime library calls the backend can emit.
Definition: RuntimeLibcalls.h:30
llvm::APInt::getBitWidth
unsigned getBitWidth() const
Return the number of bits in the APInt.
Definition: APInt.h:1439
llvm::AArch64ISD::MOVIshift
@ MOVIshift
Definition: AArch64ISelLowering.h:181
Module.h
llvm::ARMII::VecSize
@ VecSize
Definition: ARMBaseInfo.h:421
llvm::AArch64ISD::LDP
@ LDP
Definition: AArch64ISelLowering.h:477
llvm::ISD::SHL_PARTS
@ SHL_PARTS
SHL_PARTS/SRA_PARTS/SRL_PARTS - These operators are used for expanded integer shift operations.
Definition: ISDOpcodes.h:749
llvm::AttributeList
Definition: Attributes.h:432
llvm::ElementCount::getFixed
static constexpr ElementCount getFixed(ScalarTy MinVal)
Definition: TypeSize.h:291
llvm::tgtok::Bits
@ Bits
Definition: TGLexer.h:50
ConstantBuildVector
static SDValue ConstantBuildVector(SDValue Op, SelectionDAG &DAG)
Definition: AArch64ISelLowering.cpp:12156
TargetInstrInfo.h
llvm::MemSDNode::getBasePtr
const SDValue & getBasePtr() const
Definition: SelectionDAGNodes.h:1380
GenericSetCCInfo::Opnd0
const SDValue * Opnd0
Definition: AArch64ISelLowering.cpp:17100
llvm::AArch64TargetLowering::isComplexDeinterleavingSupported
bool isComplexDeinterleavingSupported() const override
Does this target support complex deinterleaving.
Definition: AArch64ISelLowering.cpp:24332
llvm::AArch64TargetLowering::isOpSuitableForLDPSTP
bool isOpSuitableForLDPSTP(const Instruction *I) const
Definition: AArch64ISelLowering.cpp:22591
performANDCombine
static SDValue performANDCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI)
Definition: AArch64ISelLowering.cpp:16499
llvm::MachineMemOperand
A description of a memory reference used in the backend.
Definition: MachineMemOperand.h:127
llvm::SelectionDAG::getStore
SDValue getStore(SDValue Chain, const SDLoc &dl, SDValue Val, SDValue Ptr, MachinePointerInfo PtrInfo, Align Alignment, MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes())
Helper function to build ISD::STORE nodes.
Definition: SelectionDAG.cpp:8168
replaceZeroVectorStore
static SDValue replaceZeroVectorStore(SelectionDAG &DAG, StoreSDNode &St)
Replace a splat of zeros to a vector store by scalar stores of WZR/XZR.
Definition: AArch64ISelLowering.cpp:18778
isSetCC
static bool isSetCC(SDValue Op, SetCCInfoAndKind &SetCCInfo)
Check whether or not Op is a SET_CC operation, either a generic or an AArch64 lowered one.
Definition: AArch64ISelLowering.cpp:17131
llvm::AArch64TargetLowering::ReconstructShuffle
SDValue ReconstructShuffle(SDValue Op, SelectionDAG &DAG) const
Definition: AArch64ISelLowering.cpp:10334
llvm::MachineMemOperand::MODereferenceable
@ MODereferenceable
The memory access is dereferenceable (i.e., doesn't trap).
Definition: MachineMemOperand.h:142
getSignExtendedGatherOpcode
unsigned getSignExtendedGatherOpcode(unsigned Opcode)
Definition: AArch64ISelLowering.cpp:5372
llvm::SelectionDAG::isBaseWithConstantOffset
bool isBaseWithConstantOffset(SDValue Op) const
Return true if the specified operand is an ISD::ADD with a ConstantSDNode on the right-hand side,...
Definition: SelectionDAG.cpp:4814
getPerfectShuffleCost
static unsigned getPerfectShuffleCost(llvm::ArrayRef< int > M)
Definition: AArch64PerfectShuffle.h:6589
llvm::AArch64ISD::MOPS_MEMSET
@ MOPS_MEMSET
Definition: AArch64ISelLowering.h:485
llvm::MachineFunction::insert
void insert(iterator MBBI, MachineBasicBlock *MBB)
Definition: MachineFunction.h:889
getEstimate
static SDValue getEstimate(const AArch64Subtarget *ST, unsigned Opcode, SDValue Operand, SelectionDAG &DAG, int &ExtraSteps)
Definition: AArch64ISelLowering.cpp:9806
llvm::SmallSet< unsigned, 8 >
llvm::CC_AArch64_GHC
bool CC_AArch64_GHC(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, CCState &State)
llvm::AArch64ISD::HADDU_PRED
@ HADDU_PRED
Definition: AArch64ISelLowering.h:106
llvm::ISD::SETCC
@ SETCC
SetCC operator - This evaluates to a true value iff the condition is true.
Definition: ISDOpcodes.h:736
isZeroingInactiveLanes
static bool isZeroingInactiveLanes(SDValue Op)
Definition: AArch64ISelLowering.cpp:264
llvm::ISD::STRICT_FPOW
@ STRICT_FPOW
Definition: ISDOpcodes.h:412
llvm::AArch64ISD::CALL
@ CALL
Definition: AArch64ISelLowering.h:53
llvm::MaskedStoreSDNode
This class is used to represent an MSTORE node.
Definition: SelectionDAGNodes.h:2687
llvm::AArch64ISD::GLDNT1_MERGE_ZERO
@ GLDNT1_MERGE_ZERO
Definition: AArch64ISelLowering.h:409
llvm::CallingConv::C
@ C
The default llvm calling convention, compatible with C.
Definition: CallingConv.h:34
llvm::AArch64ISD::GLDFF1_UXTW_MERGE_ZERO
@ GLDFF1_UXTW_MERGE_ZERO
Definition: AArch64ISelLowering.h:393
llvm::SelectionDAG::getSplatBuildVector
SDValue getSplatBuildVector(EVT VT, const SDLoc &DL, SDValue Op)
Return a splat ISD::BUILD_VECTOR node, consisting of Op splatted to all elements.
Definition: SelectionDAG.h:841
llvm::EVT::getVectorVT
static EVT getVectorVT(LLVMContext &Context, EVT VT, unsigned NumElements, bool IsScalable=false)
Returns the EVT that represents a vector NumElements in length, where each element is of type VT.
Definition: ValueTypes.h:73
llvm::AArch64TargetLowering::shouldReduceLoadWidth
bool shouldReduceLoadWidth(SDNode *Load, ISD::LoadExtType ExtTy, EVT NewVT) const override
Return true if it is profitable to reduce a load to a smaller type.
Definition: AArch64ISelLowering.cpp:13607
EnableAArch64ELFLocalDynamicTLSGeneration
cl::opt< bool > EnableAArch64ELFLocalDynamicTLSGeneration("aarch64-elf-ldtls-generation", cl::Hidden, cl::desc("Allow AArch64 Local Dynamic TLS code generation"), cl::init(false))
FMAInstKind::Accumulator
@ Accumulator
llvm::SelectionDAG::getFPExtendOrRound
SDValue getFPExtendOrRound(SDValue Op, const SDLoc &DL, EVT VT)
Convert Op, which must be of float type, to the float type VT, by either extending or rounding (by tr...
Definition: SelectionDAG.cpp:1413
llvm::CCValAssign::BCvt
@ BCvt
Definition: CallingConvLower.h:44
llvm::TargetLoweringBase::setMinFunctionAlignment
void setMinFunctionAlignment(Align Alignment)
Set the target's minimum function alignment.
Definition: TargetLowering.h:2519
getContainerForFixedLengthVector
static EVT getContainerForFixedLengthVector(SelectionDAG &DAG, EVT VT)
Definition: AArch64ISelLowering.cpp:23206
Vector
So we should use XX3Form_Rcr to implement intrinsic Convert DP outs ins xscvdpsp No builtin are required Round &Convert QP DP(dword[1] is set to zero) No builtin are required Round to Quad Precision because you need to assign rounding mode in instruction Provide builtin(set f128:$vT,(int_ppc_vsx_xsrqpi f128:$vB))(set f128 yields< n x< ty > >< result > yields< ty >< result > No builtin are required Load Store Vector
Definition: README_P9.txt:497
llvm::TargetLowering::LowerCallTo
std::pair< SDValue, SDValue > LowerCallTo(CallLoweringInfo &CLI) const
This function lowers an abstract call to a function into an actual call.
Definition: SelectionDAGBuilder.cpp:9869
llvm::MVT::fp_fixedlen_vector_valuetypes
static auto fp_fixedlen_vector_valuetypes()
Definition: MachineValueType.h:1560
llvm::SelectionDAG::getNodeIfExists
SDNode * getNodeIfExists(unsigned Opcode, SDVTList VTList, ArrayRef< SDValue > Ops, const SDNodeFlags Flags)
Get the specified node if it's already available, or else return NULL.
Definition: SelectionDAG.cpp:10092
llvm::ISD::VECREDUCE_FMAX
@ VECREDUCE_FMAX
FMIN/FMAX nodes can have flags, for NaN/NoNaN variants.
Definition: ISDOpcodes.h:1279
llvm::AArch64ISD::CALL_RVMARKER
@ CALL_RVMARKER
Definition: AArch64ISelLowering.h:57
llvm::AArch64_AM::isAdvSIMDModImmType3
static bool isAdvSIMDModImmType3(uint64_t Imm)
Definition: AArch64AddressingModes.h:482
performCSELCombine
static SDValue performCSELCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, SelectionDAG &DAG)
Definition: AArch64ISelLowering.cpp:20327
llvm::MVT::scalable_vector_valuetypes
static auto scalable_vector_valuetypes()
Definition: MachineValueType.h:1548
llvm::AArch64ISD::CBZ
@ CBZ
Definition: AArch64ISelLowering.h:283
llvm::AArch64Subtarget::getSecurityCheckCookieName
const char * getSecurityCheckCookieName() const
Definition: AArch64Subtarget.h:399
llvm::MemSDNode
This is an abstract virtual class for memory operations.
Definition: SelectionDAGNodes.h:1275
llvm::SmallPtrSet
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
Definition: SmallPtrSet.h:450
llvm::AArch64ISD::FFLOOR_MERGE_PASSTHRU
@ FFLOOR_MERGE_PASSTHRU
Definition: AArch64ISelLowering.h:131
llvm::AArch64ISD::ADC
@ ADC
Definition: AArch64ISelLowering.h:90
performAddCombineSubShift
static SDValue performAddCombineSubShift(SDNode *N, SDValue SUB, SDValue Z, SelectionDAG &DAG)
Definition: AArch64ISelLowering.cpp:17654
llvm::ore::NV
DiagnosticInfoOptimizationBase::Argument NV
Definition: OptimizationRemarkEmitter.h:136
llvm::AArch64TargetLowering::shouldConsiderGEPOffsetSplit
bool shouldConsiderGEPOffsetSplit() const override
Definition: AArch64ISelLowering.cpp:14916
llvm::tgtok::FalseVal
@ FalseVal
Definition: TGLexer.h:62
getPTrue
static SDValue getPTrue(SelectionDAG &DAG, SDLoc DL, EVT VT, int Pattern)
Definition: AArch64ISelLowering.cpp:4698
llvm::CCValAssign::AExtUpper
@ AExtUpper
Definition: CallingConvLower.h:42
llvm::ISD::isConstantSplatVectorAllZeros
bool isConstantSplatVectorAllZeros(const SDNode *N, bool BuildVectorOnly=false)
Return true if the specified node is a BUILD_VECTOR or SPLAT_VECTOR where all of the elements are 0 o...
Definition: SelectionDAG.cpp:220
llvm::AArch64TargetLowering::EmitLoweredCatchRet
MachineBasicBlock * EmitLoweredCatchRet(MachineInstr &MI, MachineBasicBlock *BB) const
Definition: AArch64ISelLowering.cpp:2689
llvm::CallingConv::WebKit_JS
@ WebKit_JS
Used for stack based JavaScript calls.
Definition: CallingConv.h:56
Results
Function Alias Analysis Results
Definition: AliasAnalysis.cpp:769
getVShiftImm
static bool getVShiftImm(SDValue Op, unsigned ElementBits, int64_t &Cnt)
getVShiftImm - Check if this is a valid build_vector for the immediate operand of a vector shift oper...
Definition: AArch64ISelLowering.cpp:12937
llvm::VectorType::getElementType
Type * getElementType() const
Definition: DerivedTypes.h:422
llvm::ConstantSDNode::isZero
bool isZero() const
Definition: SelectionDAGNodes.h:1610
llvm::MachineFrameInfo::setAdjustsStack
void setAdjustsStack(bool V)
Definition: MachineFrameInfo.h:610
llvm::AArch64_AM::isAdvSIMDModImmType7
static bool isAdvSIMDModImmType7(uint64_t Imm)
Definition: AArch64AddressingModes.h:544
llvm::ISD::MERGE_VALUES
@ MERGE_VALUES
MERGE_VALUES - This node takes multiple discrete operands and returns them all as its individual resu...
Definition: ISDOpcodes.h:236
llvm::max
Expected< ExpressionValue > max(const ExpressionValue &Lhs, const ExpressionValue &Rhs)
Definition: FileCheck.cpp:337
llvm::AArch64TargetLowering::isLegalInterleavedAccessType
bool isLegalInterleavedAccessType(VectorType *VecTy, const DataLayout &DL, bool &UseScalable) const
Returns true if VecTy is a legal interleaved access type.
Definition: AArch64ISelLowering.cpp:14328
createTblShuffleForZExt
static void createTblShuffleForZExt(ZExtInst *ZExt, bool IsLittleEndian)
Definition: AArch64ISelLowering.cpp:14063
llvm::codeview::EncodedFramePtrReg::StackPtr
@ StackPtr
getCalleeAttrsFromExternalFunction
static std::optional< SMEAttrs > getCalleeAttrsFromExternalFunction(SDValue V)
Definition: AArch64ISelLowering.cpp:4810
performNegCSelCombine
static SDValue performNegCSelCombine(SDNode *N, SelectionDAG &DAG)
Definition: AArch64ISelLowering.cpp:17386
llvm::MipsISD::Ret
@ Ret
Definition: MipsISelLowering.h:119
llvm::Intrinsic::not_intrinsic
@ not_intrinsic
Definition: Intrinsics.h:44
llvm::AArch64ISD::SMINV_PRED
@ SMINV_PRED
Definition: AArch64ISelLowering.h:273
performFpToIntCombine
static SDValue performFpToIntCombine(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const AArch64Subtarget *Subtarget)
Fold a floating-point multiply by power of two into floating-point to fixed-point conversion.
Definition: AArch64ISelLowering.cpp:15920
Select
amdgpu AMDGPU Register Bank Select
Definition: AMDGPURegBankSelect.cpp:45
llvm::AArch64ISD::MOVImsl
@ MOVImsl
Definition: AArch64ISelLowering.h:183
llvm::AArch64ISD::GLDFF1_MERGE_ZERO
@ GLDFF1_MERGE_ZERO
Definition: AArch64ISelLowering.h:391
STLExtras.h
llvm::AArch64ISD::GLDFF1S_UXTW_MERGE_ZERO
@ GLDFF1S_UXTW_MERGE_ZERO
Definition: AArch64ISelLowering.h:402
llvm::ISD::VAEND
@ VAEND
VAEND, VASTART - VAEND and VASTART have three operands: an input chain, pointer, and a SRCVALUE.
Definition: ISDOpcodes.h:1085
performXorCombine
static SDValue performXorCombine(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const AArch64Subtarget *Subtarget)
Definition: AArch64ISelLowering.cpp:15363
llvm::ISD::EXTLOAD
@ EXTLOAD
Definition: ISDOpcodes.h:1414
llvm::ISD::VECREDUCE_SEQ_FADD
@ VECREDUCE_SEQ_FADD
Generic reduction nodes.
Definition: ISDOpcodes.h:1263
llvm::AArch64ISD::FCMGT
@ FCMGT
Definition: AArch64ISelLowering.h:233
llvm::AArch64ISD::TRN2
@ TRN2
Definition: AArch64ISelLowering.h:202
RHS
Value * RHS
Definition: X86PartialReduction.cpp:76
optimizeLogicalImm
static bool optimizeLogicalImm(SDValue Op, unsigned Size, uint64_t Imm, const APInt &Demanded, TargetLowering::TargetLoweringOpt &TLO, unsigned NewOpc)
Definition: AArch64ISelLowering.cpp:2004
llvm::AArch64ISD::GLDFF1_IMM_MERGE_ZERO
@ GLDFF1_IMM_MERGE_ZERO
Definition: AArch64ISelLowering.h:397
llvm::AArch64ISD::CCMP
@ CCMP
Definition: AArch64ISelLowering.h:161
valueToCarryFlag
static SDValue valueToCarryFlag(SDValue Value, SelectionDAG &DAG, bool Invert)
Definition: AArch64ISelLowering.cpp:3761
llvm::AArch64ISD::FMINNMV_PRED
@ FMINNMV_PRED
Definition: AArch64ISelLowering.h:335
llvm::SelectionDAG::getZExtOrTrunc
SDValue getZExtOrTrunc(SDValue Op, const SDLoc &DL, EVT VT)
Convert Op, which must be of integer type, to the integer type VT, by either zero-extending or trunca...
Definition: SelectionDAG.cpp:1446
llvm::MachineFrameInfo::getObjectIndexEnd
int getObjectIndexEnd() const
Return one past the maximum frame object index.
Definition: MachineFrameInfo.h:410
llvm::AArch64ISD::URSHR_I
@ URSHR_I
Definition: AArch64ISelLowering.h:219
llvm::AArch64ISD::FRECPS
@ FRECPS
Definition: AArch64ISelLowering.h:314
llvm::LoadInst::getPointerOperand
Value * getPointerOperand()
Definition: Instructions.h:264
llvm::isPowerOf2_32
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
Definition: MathExtras.h:292
llvm::ShuffleVectorInst::isReverseMask
static bool isReverseMask(ArrayRef< int > Mask)
Return true if this shuffle mask swaps the order of elements from exactly one source vector.
Definition: Instructions.cpp:2326
llvm::MaskedGatherScatterSDNode
This is a base class used to represent MGATHER and MSCATTER nodes.
Definition: SelectionDAGNodes.h:2807
llvm::AArch64ISD::GLD1S_IMM_MERGE_ZERO
@ GLD1S_IMM_MERGE_ZERO
Definition: AArch64ISelLowering.h:388
llvm::AArch64Subtarget::isWindowsArm64EC
bool isWindowsArm64EC() const
Definition: AArch64Subtarget.h:267
constructDup
static SDValue constructDup(SDValue V, int Lane, SDLoc dl, EVT VT, unsigned Opcode, SelectionDAG &DAG)
Definition: AArch64ISelLowering.cpp:11288
llvm::ISD::SETOEQ
@ SETOEQ
Definition: ISDOpcodes.h:1437
performMaskedGatherScatterCombine
static SDValue performMaskedGatherScatterCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, SelectionDAG &DAG)
Definition: AArch64ISelLowering.cpp:19681
performIntrinsicCombine
static SDValue performIntrinsicCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, const AArch64Subtarget *Subtarget)
Definition: AArch64ISelLowering.cpp:18147
llvm::gep_type_begin
gep_type_iterator gep_type_begin(const User *GEP)
Definition: GetElementPtrTypeIterator.h:123
GenericSetCCInfo::Opnd1
const SDValue * Opnd1
Definition: AArch64ISelLowering.cpp:17101
llvm::AArch64FunctionInfo::setVarArgsStackIndex
void setVarArgsStackIndex(int Index)
Definition: AArch64MachineFunctionInfo.h:340
llvm::SelectionDAG::isConstantIntBuildVectorOrConstantInt
SDNode * isConstantIntBuildVectorOrConstantInt(SDValue N) const
Test whether the given value is a constant int or similar node.
Definition: SelectionDAG.cpp:12089
llvm::BlockAddressSDNode
Definition: SelectionDAGNodes.h:2199
llvm::AArch64ISD::REV32
@ REV32
Definition: AArch64ISelLowering.h:204
llvm::MVT::v2f64
@ v2f64
Definition: MachineValueType.h:190
llvm::CCValAssign::Trunc
@ Trunc
Definition: CallingConvLower.h:45
llvm::MachineFrameInfo::CreateVariableSizedObject
int CreateVariableSizedObject(Align Alignment, const AllocaInst *Alloca)
Notify the MachineFrameInfo object that a variable sized object has been created.
Definition: MachineFrameInfo.cpp:74
llvm::AArch64ISD::TLSDESC_CALLSEQ
@ TLSDESC_CALLSEQ
Definition: AArch64ISelLowering.h:74
llvm::TargetLoweringBase::emitPatchPoint
MachineBasicBlock * emitPatchPoint(MachineInstr &MI, MachineBasicBlock *MBB) const
Replace/modify any TargetFrameIndex operands with a targte-dependent sequence of memory operands that...
Definition: TargetLoweringBase.cpp:1165
llvm::CallBase::addParamAttr
void addParamAttr(unsigned ArgNo, Attribute::AttrKind Kind)
Adds the attribute to the indicated argument.
Definition: InstrTypes.h:1536
llvm::FixedVectorType
Class to represent fixed width SIMD vectors.
Definition: DerivedTypes.h:525
performVectorAddSubExtCombine
static SDValue performVectorAddSubExtCombine(SDNode *N, SelectionDAG &DAG)
Definition: AArch64ISelLowering.cpp:17533
SelectionDAG.h
llvm::SPII::Load
@ Load
Definition: SparcInstrInfo.h:32
llvm::AArch64ISD::RDSVL
@ RDSVL
Definition: AArch64ISelLowering.h:430
llvm::Type::getInt8Ty
static IntegerType * getInt8Ty(LLVMContext &C)
Definition: Type.cpp:239
TRI
unsigned const TargetRegisterInfo * TRI
Definition: MachineSink.cpp:1628
llvm::SelectionDAG::getSubtarget
const TargetSubtargetInfo & getSubtarget() const
Definition: SelectionDAG.h:470
llvm::AArch64ISD::RHADDS_PRED
@ RHADDS_PRED
Definition: AArch64ISelLowering.h:110
llvm::MVT::nxv8i16
@ nxv8i16
Definition: MachineValueType.h:225
llvm::AArch64ISD::LD1x4post
@ LD1x4post
Definition: AArch64ISelLowering.h:456
llvm::TargetLoweringBase::shouldProduceAndByConstByHoistingConstFromShiftsLHSOfAnd
virtual bool shouldProduceAndByConstByHoistingConstFromShiftsLHSOfAnd(SDValue X, ConstantSDNode *XC, ConstantSDNode *CC, SDValue Y, unsigned OldShiftOpcode, unsigned NewShiftOpcode, SelectionDAG &DAG) const
Given the pattern (X & (C l>>/<< Y)) ==/!= 0 return true if it should be transformed into: ((X <</l>>...
Definition: TargetLowering.h:763
llvm::TargetLoweringBase::MaxGluedStoresPerMemcpy
unsigned MaxGluedStoresPerMemcpy
Specify max number of store instructions to glue in inlined memcpy.
Definition: TargetLowering.h:3459
llvm::ISD::STRICT_FLOG
@ STRICT_FLOG
Definition: ISDOpcodes.h:418
Use.h
llvm::Data
@ Data
Definition: SIMachineScheduler.h:55
llvm::getOffset
static Error getOffset(const SymbolRef &Sym, SectionRef Sec, uint64_t &Result)
Definition: RuntimeDyld.cpp:172
llvm::ISD::STRICT_FP_TO_UINT
@ STRICT_FP_TO_UINT
Definition: ISDOpcodes.h:442
llvm::Type::getInt32Ty
static IntegerType * getInt32Ty(LLVMContext &C)
Definition: Type.cpp:241
OP_VEXT1
@ OP_VEXT1
Definition: ARMISelLowering.cpp:8317
llvm::GlobalValue::hasExternalWeakLinkage
bool hasExternalWeakLinkage() const
Definition: GlobalValue.h:524
llvm::AArch64::SMEMatrixTileH
@ SMEMatrixTileH
Definition: AArch64InstrInfo.h:589
llvm::AArch64TargetLowering::isAllActivePredicate
bool isAllActivePredicate(SelectionDAG &DAG, SDValue N) const
Definition: AArch64ISelLowering.cpp:24248
llvm::AArch64ISD::GLD1S_UXTW_SCALED_MERGE_ZERO
@ GLD1S_UXTW_SCALED_MERGE_ZERO
Definition: AArch64ISelLowering.h:386
llvm::ISD::SETUEQ
@ SETUEQ
Definition: ISDOpcodes.h:1445
llvm::TypeSize::Fixed
static constexpr TypeSize Fixed(ScalarTy ExactSize)
Definition: TypeSize.h:331
llvm::MachineFrameInfo::setObjectAlignment
void setObjectAlignment(int ObjectIdx, Align Alignment)
setObjectAlignment - Change the alignment of the specified stack object.
Definition: MachineFrameInfo.h:497
isSingletonEXTMask
static bool isSingletonEXTMask(ArrayRef< int > M, EVT VT, unsigned &Imm)
Definition: AArch64ISelLowering.cpp:10623
llvm::ISD::SMAX
@ SMAX
Definition: ISDOpcodes.h:661
llvm::TargetLoweringBase::setIndexedStoreAction
void setIndexedStoreAction(ArrayRef< unsigned > IdxModes, MVT VT, LegalizeAction Action)
Indicate that the specified indexed store does or does not work with the specified type and indicate ...
Definition: TargetLowering.h:2439
llvm::SelectionDAG::getContext
LLVMContext * getContext() const
Definition: SelectionDAG.h:481
llvm::ArrayRef::empty
bool empty() const
empty - Check if the array is empty.
Definition: ArrayRef.h:158
llvm::AArch64ISD::SHL_PRED
@ SHL_PRED
Definition: AArch64ISelLowering.h:113
llvm::MaskedScatterSDNode::isTruncatingStore
bool isTruncatingStore() const
Return true if the op does a truncation before store.
Definition: SelectionDAGNodes.h:2885
LLVM_DEBUG
#define LLVM_DEBUG(X)
Definition: Debug.h:101
llvm::ISD::ATOMIC_LOAD_OR
@ ATOMIC_LOAD_OR
Definition: ISDOpcodes.h:1191
llvm::ISD::FABS
@ FABS
Definition: ISDOpcodes.h:911
im
#define im(i)
llvm::commonAlignment
Align commonAlignment(Align A, uint64_t Offset)
Returns the alignment that satisfies both alignments.
Definition: Alignment.h:212
llvm::AArch64ISD::FCMP
@ FCMP
Definition: AArch64ISelLowering.h:166
llvm::CallingConv::AArch64_SVE_VectorCall
@ AArch64_SVE_VectorCall
Used between AArch64 SVE functions.
Definition: CallingConv.h:221
llvm::ISD::ADDCARRY
@ ADDCARRY
Carry-using nodes for multiple precision addition and subtraction.
Definition: ISDOpcodes.h:303
combineSVEPrefetchVecBaseImmOff
static SDValue combineSVEPrefetchVecBaseImmOff(SDNode *N, SelectionDAG &DAG, unsigned ScalarSizeInBytes)
Combines a node carrying the intrinsic aarch64_sve_prf<T>_gather_scalar_offset into a node that uses ...
Definition: AArch64ISelLowering.cpp:21345
F
#define F(x, y, z)
Definition: MD5.cpp:55
llvm::AArch64ISD::SUNPKLO
@ SUNPKLO
Definition: AArch64ISelLowering.h:319
LowerSVEIntrinsicIndex
static SDValue LowerSVEIntrinsicIndex(SDNode *N, SelectionDAG &DAG)
Definition: AArch64ISelLowering.cpp:17906
llvm::objcarc::getAttachedARCFunction
std::optional< Function * > getAttachedARCFunction(const CallBase *CB)
This function returns operand bundle clang_arc_attachedcall's argument, which is the address of the A...
Definition: ObjCARCUtil.h:43
llvm::ISD::BRCOND
@ BRCOND
BRCOND - Conditional branch.
Definition: ISDOpcodes.h:1000
llvm::SDNode::getVTList
SDVTList getVTList() const
Definition: SelectionDAGNodes.h:949
llvm::Triple::isWindowsMSVCEnvironment
bool isWindowsMSVCEnvironment() const
Checks if the environment could be MSVC.
Definition: Triple.h:593
llvm::AArch64II::MO_TLS
@ MO_TLS
MO_TLS - Indicates that the operand being accessed is some kind of thread-local symbol.
Definition: AArch64BaseInfo.h:753
MachineRegisterInfo.h
llvm::EVT::changeTypeToInteger
EVT changeTypeToInteger()
Return the type converted to an equivalently sized integer or vector with integer element type.
Definition: ValueTypes.h:113
llvm::AArch64ISD::SIGN_EXTEND_INREG_MERGE_PASSTHRU
@ SIGN_EXTEND_INREG_MERGE_PASSTHRU
Definition: AArch64ISelLowering.h:146
llvm::SMEAttrs
SMEAttrs is a utility class to parse the SME ACLE attributes on functions.
Definition: AArch64SMEAttributes.h:24
performReinterpretCastCombine
static SDValue performReinterpretCastCombine(SDNode *N)
Definition: AArch64ISelLowering.cpp:16390
KnownBits.h
llvm::ComplexDeinterleavingOperation
ComplexDeinterleavingOperation
Definition: ComplexDeinterleavingPass.h:36
llvm::ShuffleVectorSDNode
This SDNode is used to implement the code generator support for the llvm IR shufflevector instruction...
Definition: SelectionDAGNodes.h:1528
llvm::AtomicOrdering::Monotonic
@ Monotonic
llvm::AArch64ISD::LD4LANEpost
@ LD4LANEpost
Definition: AArch64ISelLowering.h:467
tryWidenMaskForShuffle
static SDValue tryWidenMaskForShuffle(SDValue Op, SelectionDAG &DAG)
Definition: AArch64ISelLowering.cpp:11395
llvm::MVT::isScalableVector
bool isScalableVector() const
Return true if this is a vector value type where the runtime length is machine dependent.
Definition: MachineValueType.h:399
convertToScalableVector
static SDValue convertToScalableVector(SelectionDAG &DAG, EVT VT, SDValue V)
Definition: AArch64ISelLowering.cpp:23291
llvm::AArch64ISD::LD3DUPpost
@ LD3DUPpost
Definition: AArch64ISelLowering.h:462
llvm::SelectionDAG::getRegister
SDValue getRegister(unsigned Reg, EVT VT)
Definition: SelectionDAG.cpp:2147
llvm::BasicBlock
LLVM Basic Block Representation.
Definition: BasicBlock.h:55
llvm::ComputeValueVTs
void ComputeValueVTs(const TargetLowering &TLI, const DataLayout &DL, Type *Ty, SmallVectorImpl< EVT > &ValueVTs, SmallVectorImpl< uint64_t > *Offsets=nullptr, uint64_t StartingOffset=0)
ComputeValueVTs - Given an LLVM IR type, compute a sequence of EVTs that represent all the individual...
Definition: Analysis.cpp:121
getReductionSDNode
static SDValue getReductionSDNode(unsigned Op, SDLoc DL, SDValue ScalarOp, SelectionDAG &DAG)
Definition: AArch64ISelLowering.cpp:13230
llvm::TargetLoweringBase::setIndexedLoadAction
void setIndexedLoadAction(ArrayRef< unsigned > IdxModes, MVT VT, LegalizeAction Action)
Indicate that the specified indexed load does or does not work with the specified type and indicate w...
Definition: TargetLowering.h:2422
llvm::MipsISD::Hi
@ Hi
Definition: MipsISelLowering.h:75
performSTORECombine
static SDValue performSTORECombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, SelectionDAG &DAG, const AArch64Subtarget *Subtarget)
Definition: AArch64ISelLowering.cpp:19475
llvm::yaml::isInteger
static bool isInteger(StringRef Val)
Definition: ELFYAML.cpp:1528
llvm::AArch64ISD::ADDlow
@ ADDlow
Definition: AArch64ISelLowering.h:77
llvm::EVT::isSimple
bool isSimple() const
Test if the given EVT is simple (as opposed to being extended).
Definition: ValueTypes.h:129
llvm::AArch64ISD::STRICT_FCMP
@ STRICT_FCMP
Definition: AArch64ISelLowering.h:444
llvm::ISD::STRICT_FLOG2
@ STRICT_FLOG2
Definition: ISDOpcodes.h:420
llvm::AArch64ISD::GLD1_MERGE_ZERO
@ GLD1_MERGE_ZERO
Definition: AArch64ISelLowering.h:373
llvm::ISD::STRICT_FROUND
@ STRICT_FROUND
Definition: ISDOpcodes.h:427
llvm::UndefMaskElem
constexpr int UndefMaskElem
Definition: Instructions.h:2005
performSelectCombine
static SDValue performSelectCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI)
A vector select: "(select vL, vR, (setcc LHS, RHS))" is best performed with the compare-mask instruct...
Definition: AArch64ISelLowering.cpp:20783
llvm::EVT::is256BitVector
bool is256BitVector() const
Return true if this is a 256-bit vector type.
Definition: ValueTypes.h:190
llvm::MVT::nxv8bf16
@ nxv8bf16
Definition: MachineValueType.h:256
MachineValueType.h
OP_VDUP2
@ OP_VDUP2
Definition: ARMISelLowering.cpp:8315
llvm::MVT::SimpleValueType
SimpleValueType
Definition: MachineValueType.h:33
llvm::ISD::BRIND
@ BRIND
BRIND - Indirect branch.
Definition: ISDOpcodes.h:986
llvm::Reloc::Model
Model
Definition: CodeGen.h:25
llvm::ISD::AVGFLOORU
@ AVGFLOORU
Definition: ISDOpcodes.h:644
llvm::ISD::ROTL
@ ROTL
Definition: ISDOpcodes.h:694
llvm::MaskedLoadSDNode::getPassThru
const SDValue & getPassThru() const
Definition: SelectionDAGNodes.h:2677
llvm::AArch64CC::LT
@ LT
Definition: AArch64BaseInfo.h:266
llvm::AArch64ISD::RESTORE_ZA
@ RESTORE_ZA
Definition: AArch64ISelLowering.h:70
SetCCInfoAndKind::IsAArch64
bool IsAArch64
Definition: AArch64ISelLowering.cpp:17122
PerfectShuffleTable
static const unsigned PerfectShuffleTable[6561+1]
Definition: AArch64PerfectShuffle.h:25
Context
LLVMContext & Context
Definition: NVVMIntrRange.cpp:66
llvm::dbgs
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:163
llvm::MaskedGatherSDNode::getPassThru
const SDValue & getPassThru() const
Definition: SelectionDAGNodes.h:2857
llvm::AArch64ISD::MVNIshift
@ MVNIshift
Definition: AArch64ISelLowering.h:185
llvm::TargetLowering::TargetLoweringOpt::CombineTo
bool CombineTo(SDValue O, SDValue N)
Definition: TargetLowering.h:3668
performLOADCombine
static SDValue performLOADCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, SelectionDAG &DAG, const AArch64Subtarget *Subtarget)
Definition: AArch64ISelLowering.cpp:19395
llvm::AArch64ISD::REVD_MERGE_PASSTHRU
@ REVD_MERGE_PASSTHRU
Definition: AArch64ISelLowering.h:431
llvm::classifyEHPersonality
EHPersonality classifyEHPersonality(const Value *Pers)
See if the given exception handling personality function is one that we understand.
Definition: EHPersonalities.cpp:22
llvm::TargetLoweringBase::setTargetDAGCombine
void setTargetDAGCombine(ArrayRef< ISD::NodeType > NTs)
Targets should invoke this method for each target independent node that they want to provide a custom...
Definition: TargetLowering.h:2511
llvm::APInt::setHighBits
void setHighBits(unsigned hiBits)
Set the top hiBits bits.
Definition: APInt.h:1370
llvm::ISD::VECREDUCE_UMAX
@ VECREDUCE_UMAX
Definition: ISDOpcodes.h:1291
performFlagSettingCombine
static SDValue performFlagSettingCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, unsigned GenericOpcode)
Definition: AArch64ISelLowering.cpp:20463
llvm::AArch64TargetLowering::isVScaleKnownToBeAPowerOfTwo
bool isVScaleKnownToBeAPowerOfTwo() const override
Return true only if vscale must be a power of two.
Definition: AArch64ISelLowering.cpp:6148
llvm::MVT::v4bf16
@ v4bf16
Definition: MachineValueType.h:160
llvm::AArch64ISD::SST1_UXTW_PRED
@ SST1_UXTW_PRED
Definition: AArch64ISelLowering.h:419
Arg
amdgpu Simplify well known AMD library false FunctionCallee Value * Arg
Definition: AMDGPULibCalls.cpp:187
llvm::AArch64ISD::FCMLTz
@ FCMLTz
Definition: AArch64ISelLowering.h:245
llvm::AArch64FunctionInfo::setHasSwiftAsyncContext
void setHasSwiftAsyncContext(bool HasContext)
Definition: AArch64MachineFunctionInfo.h:437
performDupLane128Combine
static SDValue performDupLane128Combine(SDNode *N, SelectionDAG &DAG)
Definition: AArch64ISelLowering.cpp:21513
performNEONPostLDSTCombine
static SDValue performNEONPostLDSTCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, SelectionDAG &DAG)
Target-specific DAG combine function for NEON load/store intrinsics to merge base address updates.
Definition: AArch64ISelLowering.cpp:19719
llvm::MVT::integer_valuetypes
static auto integer_valuetypes()
Definition: MachineValueType.h:1525
llvm::MachineBasicBlock::addSuccessor
void addSuccessor(MachineBasicBlock *Succ, BranchProbability Prob=BranchProbability::getUnknown())
Add Succ as a successor of this MachineBasicBlock.
Definition: MachineBasicBlock.cpp:772
llvm::SMEAttrs::ZA_Shared
@ ZA_Shared
Definition: AArch64SMEAttributes.h:34
llvm::TargetLoweringBase::preferredShiftLegalizationStrategy
virtual ShiftLegalizationStrategy preferredShiftLegalizationStrategy(SelectionDAG &DAG, SDNode *N, unsigned ExpansionFactor) const
Definition: TargetLowering.h:932
llvm::ISD::FFLOOR
@ FFLOOR
Definition: ISDOpcodes.h:929
llvm::TargetLoweringBase::isLegalAddImmediate
virtual bool isLegalAddImmediate(int64_t) const
Return true if the specified immediate is legal add immediate, that is the target has add instruction...
Definition: TargetLowering.h:2627
llvm::BitmaskEnumDetail::Mask
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
Definition: BitmaskEnum.h:80
Instruction.h
llvm::AArch64ISD::CMEQz
@ CMEQz
Definition: AArch64ISelLowering.h:236
CommandLine.h
llvm::ISD::LoadExtType
LoadExtType
LoadExtType enum - This enum defines the three variants of LOADEXT (load with extension).
Definition: ISDOpcodes.h:1414
analyzeCallOperands
static void analyzeCallOperands(const AArch64TargetLowering &TLI, const AArch64Subtarget *Subtarget, const TargetLowering::CallLoweringInfo &CLI, CCState &CCInfo)
Definition: AArch64ISelLowering.cpp:6881
tryCombineToBSL
static SDValue tryCombineToBSL(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, const AArch64TargetLowering &TLI)
Definition: AArch64ISelLowering.cpp:16122
llvm::MachineInstrBuilder::addDef
const MachineInstrBuilder & addDef(Register RegNo, unsigned Flags=0, unsigned SubReg=0) const
Add a virtual register definition operand.
Definition: MachineInstrBuilder.h:116
llvm::FixedVectorType::getNumElements
unsigned getNumElements() const
Definition: DerivedTypes.h:568
llvm::SelectionDAG::getVScale
SDValue getVScale(const SDLoc &DL, EVT VT, APInt MulImm)
Return a node that represents the runtime scaling 'MulImm * RuntimeVL'.
Definition: SelectionDAG.h:1062
llvm::ISD::STRICT_FDIV
@ STRICT_FDIV
Definition: ISDOpcodes.h:403
LHS
Value * LHS
Definition: X86PartialReduction.cpp:75
llvm::ISD::BR_CC
@ BR_CC
BR_CC - Conditional branch.
Definition: ISDOpcodes.h:1007
llvm::TargetLowering::TargetLoweringOpt::DAG
SelectionDAG & DAG
Definition: TargetLowering.h:3655
ReplaceAddWithADDP
static void ReplaceAddWithADDP(SDNode *N, SmallVectorImpl< SDValue > &Results, SelectionDAG &DAG, const AArch64Subtarget *Subtarget)
Definition: AArch64ISelLowering.cpp:22033
llvm::MachineFunction::ArgRegPair
Structure used to represent pair of argument number after call lowering and register used to transfer...
Definition: MachineFunction.h:436
llvm::TargetLoweringBase::AtomicExpansionKind::Expand
@ Expand
tryToWidenSetCCOperands
static SDValue tryToWidenSetCCOperands(SDNode *Op, SelectionDAG &DAG)
Definition: AArch64ISelLowering.cpp:20348
llvm::AArch64TargetLowering::optimizeExtendOrTruncateConversion
bool optimizeExtendOrTruncateConversion(Instruction *I, Loop *L) const override
Try to optimize extending or truncating conversion instructions (like zext, trunc,...
Definition: AArch64ISelLowering.cpp:14220
x3
In x86 we generate this spiffy xmm0 xmm0 ret in x86 we generate this which could be xmm1 movss xmm1 xmm0 ret In sse4 we could use insertps to make both better Here s another testcase that could use x3
Definition: README-SSE.txt:547
llvm::ConstantInt
This is the shared class of boolean and integer constants.
Definition: Constants.h:78
llvm::Type::isArrayTy
bool isArrayTy() const
True if this is an instance of ArrayType.
Definition: Type.h:246
llvm::SelectionDAG::getLoad
SDValue getLoad(EVT VT, const SDLoc &dl, SDValue Chain, SDValue Ptr, MachinePointerInfo PtrInfo, MaybeAlign Alignment=MaybeAlign(), MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr)
Loads are not normal binary operators: their result type is not determined by their operands,...
Definition: SelectionDAG.cpp:8118
performLD1ReplicateCombine
static SDValue performLD1ReplicateCombine(SDNode *N, SelectionDAG &DAG)
Definition: AArch64ISelLowering.cpp:18699
isZerosVector
static bool isZerosVector(const SDNode *N)
isZerosVector - Check whether SDNode N is a zero-filled vector.
Definition: AArch64ISelLowering.cpp:2866
llvm::Instruction::getOpcode
unsigned getOpcode() const
Returns a member of one of the enums like Instruction::Add.
Definition: Instruction.h:168
llvm::MVT::i1
@ i1
Definition: MachineValueType.h:43
llvm::all_of
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1735
llvm::SDNode::getOpcode
unsigned getOpcode() const
Return the SelectionDAG opcode value for this node.
Definition: SelectionDAGNodes.h:644
llvm::EVT::getStoreSize
TypeSize getStoreSize() const
Return the number of bytes overwritten by a store of the specified value type.
Definition: ValueTypes.h:362
llvm::TargetLowering::CallLoweringInfo::IsVarArg
bool IsVarArg
Definition: TargetLowering.h:4203
llvm::APInt::isNonNegative
bool isNonNegative() const
Determine if this APInt Value is non-negative (>= 0)
Definition: APInt.h:317
llvm::MVT::v8f16
@ v8f16
Definition: MachineValueType.h:150
llvm::AArch64ISD::FMUL_PRED
@ FMUL_PRED
Definition: AArch64ISelLowering.h:103
llvm::AArch64_AM::encodeAdvSIMDModImmType6
static uint8_t encodeAdvSIMDModImmType6(uint64_t Imm)
Definition: AArch64AddressingModes.h:534
llvm::MachineFunction::getRegInfo
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
Definition: MachineFunction.h:682
llvm::AArch64SelectionDAGInfo
Definition: AArch64SelectionDAGInfo.h:20
llvm::SelectionDAG::getMaskedStore
SDValue getMaskedStore(SDValue Chain, const SDLoc &dl, SDValue Val, SDValue Base, SDValue Offset, SDValue Mask, EVT MemVT, MachineMemOperand *MMO, ISD::MemIndexedMode AM, bool IsTruncating=false, bool IsCompressing=false)
Definition: SelectionDAG.cpp:8955
llvm::ISD::STRICT_FP_TO_SINT
@ STRICT_FP_TO_SINT
STRICT_FP_TO_[US]INT - Convert a floating point value to a signed or unsigned integer.
Definition: ISDOpcodes.h:441
llvm::CCValAssign::AExt
@ AExt
Definition: CallingConvLower.h:37
llvm::AArch64ISD::REVW_MERGE_PASSTHRU
@ REVW_MERGE_PASSTHRU
Definition: AArch64ISelLowering.h:345
llvm::AArch64ISD::FRINT_MERGE_PASSTHRU
@ FRINT_MERGE_PASSTHRU
Definition: AArch64ISelLowering.h:135
llvm::TargetLoweringBase::isOperationLegalOrCustom
bool isOperationLegalOrCustom(unsigned Op, EVT VT, bool LegalOnly=false) const
Return true if the specified operation is legal on this target or can be made legal with custom lower...
Definition: TargetLowering.h:1187
llvm::AArch64Subtarget::getPrefLoopAlignment
Align getPrefLoopAlignment() const
Definition: AArch64Subtarget.h:247
llvm::TargetLowering::isTargetCanonicalConstantNode
virtual bool isTargetCanonicalConstantNode(SDValue Op) const
Returns true if the given Opc is considered a canonical constant for the target, which should not be ...
Definition: TargetLowering.h:3929
llvm::ISD::GlobalAddress
@ GlobalAddress
Definition: ISDOpcodes.h:78
llvm::AArch64II::MO_G0
@ MO_G0
MO_G0 - A symbol operand with this flag (granule 0) represents the bits 0-15 of a 64-bit address,...
Definition: AArch64BaseInfo.h:727
llvm::AArch64ISD::CSEL
@ CSEL
Definition: AArch64ISelLowering.h:82
llvm::ISD::SELECT_CC
@ SELECT_CC
Select with condition operator - This selects between a true value and a false value (ops #2 and #3) ...
Definition: ISDOpcodes.h:728
llvm::Log2_64
unsigned Log2_64(uint64_t Value)
Return the floor log base 2 of the specified value, -1 if the value is zero.
Definition: MathExtras.h:388
llvm::TargetLowering::CallLoweringInfo::Args
ArgListTy Args
Definition: TargetLowering.h:4222
llvm::MOStridedAccess
static const MachineMemOperand::Flags MOStridedAccess
Definition: AArch64InstrInfo.h:30
llvm::TargetInstrInfo
TargetInstrInfo - Interface to description of machine instruction set.
Definition: TargetInstrInfo.h:98
GlobalValue.h
isZIPMask
static bool isZIPMask(ArrayRef< int > M, EVT VT, unsigned &WhichResult)
Definition: AArch64ISelLowering.cpp:10846
llvm::AArch64TargetLowering::PerformDAGCombine
SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const override
This method will be invoked for all target nodes and for any target-independent nodes that the target...
Definition: AArch64ISelLowering.cpp:21548
performVecReduceAddCombineWithUADDLP
static SDValue performVecReduceAddCombineWithUADDLP(SDNode *N, SelectionDAG &DAG)
Definition: AArch64ISelLowering.cpp:15123
llvm::MVT::nxv2bf16
@ nxv2bf16
Definition: MachineValueType.h:254
llvm::TargetLoweringBase::shouldLocalize
virtual bool shouldLocalize(const MachineInstr &MI, const TargetTransformInfo *TTI) const
Check whether or not MI needs to be moved close to its uses.
Definition: TargetLoweringBase.cpp:2327
isConstantSplatVectorMaskForType
static bool isConstantSplatVectorMaskForType(SDNode *N, EVT MemVT)
Definition: AArch64ISelLowering.cpp:16316
llvm::RecurKind::And
@ And
Bitwise or logical AND of integers.
llvm::AArch64ISD::MVNImsl
@ MVNImsl
Definition: AArch64ISelLowering.h:186
llvm::SDValue::getValueType
EVT getValueType() const
Return the ValueType of the referenced return value.
Definition: SelectionDAGNodes.h:1141
performFDivCombine
static SDValue performFDivCombine(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const AArch64Subtarget *Subtarget)
Fold a floating-point divide by power of two into fixed-point to floating-point conversion.
Definition: AArch64ISelLowering.cpp:15991
llvm::MemIntrinsicSDNode
This SDNode is used for target intrinsics that touch memory and need an associated MachineMemOperand.
Definition: SelectionDAGNodes.h:1502
llvm::CCValAssign
CCValAssign - Represent assignment of one arg/retval to a location.
Definition: CallingConvLower.h:31
TargetMachine.h
llvm::AArch64TargetLowering::isFMAFasterThanFMulAndFAdd
bool isFMAFasterThanFMulAndFAdd(const MachineFunction &MF, EVT VT) const override
Return true if an FMA operation is faster than a pair of fmul and fadd instructions.
Definition: AArch64ISelLowering.cpp:14921
llvm::ISD::CTLZ
@ CTLZ
Definition: ISDOpcodes.h:702
llvm::AArch64ISD::ST4LANEpost
@ ST4LANEpost
Definition: AArch64ISelLowering.h:470
llvm::AArch64CC::VC
@ VC
Definition: AArch64BaseInfo.h:262
llvm::MachineInstrBuilder::addMBB
const MachineInstrBuilder & addMBB(MachineBasicBlock *MBB, unsigned TargetFlags=0) const
Definition: MachineInstrBuilder.h:146
llvm::AArch64ISD::TRN1
@ TRN1
Definition: AArch64ISelLowering.h:201
llvm::CC_AArch64_Arm64EC_VarArg
bool CC_AArch64_Arm64EC_VarArg(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, CCState &State)
llvm::PGSOQueryType::Test
@ Test
llvm::SelectionDAG
This is used to represent a portion of an LLVM function in a low-level Data Dependence DAG representa...
Definition: SelectionDAG.h:221
llvm::AArch64ISD::SST1_SCALED_PRED
@ SST1_SCALED_PRED
Definition: AArch64ISelLowering.h:418
llvm::AArch64ISD::CMGEz
@ CMGEz
Definition: AArch64ISelLowering.h:237
SelectionDAGNodes.h
llvm::AArch64ISD::SMAXV
@ SMAXV
Definition: AArch64ISelLowering.h:266
llvm::ISD::SELECT
@ SELECT
Select(COND, TRUEVAL, FALSEVAL).
Definition: ISDOpcodes.h:713
llvm::CallingConv::Swift
@ Swift
Calling convention for Swift.
Definition: CallingConv.h:69
llvm::PatternMatch::m_ZExtOrSExt
match_combine_or< CastClass_match< OpTy, Instruction::ZExt >, CastClass_match< OpTy, Instruction::SExt > > m_ZExtOrSExt(const OpTy &Op)
Definition: PatternMatch.h:1648
Constants.h
llvm::AArch64FunctionInfo::getBytesInStackArgArea
unsigned getBytesInStackArgArea() const
Definition: AArch64MachineFunctionInfo.h:211
llvm::AArch64ISD::SRL_PRED
@ SRL_PRED
Definition: AArch64ISelLowering.h:117
llvm::Failed
testing::Matcher< const detail::ErrorHolder & > Failed()
Definition: Error.h:198
llvm::PatternMatch::match
bool match(Val *V, const Pattern &P)
Definition: PatternMatch.h:49
llvm::SDNode::hasOneUse
bool hasOneUse() const
Return true if there is exactly one use of this node.
Definition: SelectionDAGNodes.h:718
isNegatedInteger
static bool isNegatedInteger(SDValue Op)
Definition: AArch64ISelLowering.cpp:17368
llvm::ISD::STRICT_FRINT
@ STRICT_FRINT
Definition: ISDOpcodes.h:421
llvm::ISD::Constant
@ Constant
Definition: ISDOpcodes.h:76
llvm::AArch64ISD::MOPS_MEMMOVE
@ MOPS_MEMMOVE
Definition: AArch64ISelLowering.h:488
llvm::AArch64TargetLowering::shouldExpandAtomicCmpXchgInIR
TargetLoweringBase::AtomicExpansionKind shouldExpandAtomicCmpXchgInIR(AtomicCmpXchgInst *AI) const override
Returns how the given atomic cmpxchg should be expanded by the IR-level AtomicExpand pass.
Definition: AArch64ISelLowering.cpp:22777
llvm::ISD::ZERO_EXTEND
@ ZERO_EXTEND
ZERO_EXTEND - Used for integer types, zeroing the new bits.
Definition: ISDOpcodes.h:763
llvm::SelectionDAG::getBoolExtOrTrunc
SDValue getBoolExtOrTrunc(SDValue Op, const SDLoc &SL, EVT VT, EVT OpVT)
Convert Op, which must be of integer type, to the integer type VT, by using an extension appropriate ...
Definition: SelectionDAG.cpp:1452
llvm::AArch64TargetLowering::lowerInterleavedStore
bool lowerInterleavedStore(StoreInst *SI, ShuffleVectorInst *SVI, unsigned Factor) const override
Lower an interleaved store into a stN intrinsic.
Definition: AArch64ISelLowering.cpp:14575
llvm::AArch64ISD::GLDNT1S_MERGE_ZERO
@ GLDNT1S_MERGE_ZERO
Definition: AArch64ISelLowering.h:411
llvm::AArch64ISD::LD1_MERGE_ZERO
@ LD1_MERGE_ZERO
Definition: AArch64ISelLowering.h:358
llvm::TargetLowering::DAGCombinerInfo::isBeforeLegalizeOps
bool isBeforeLegalizeOps() const
Definition: TargetLowering.h:3945
llvm::SelectionDAG::getTargetBlockAddress
SDValue getTargetBlockAddress(const BlockAddress *BA, EVT VT, int64_t Offset=0, unsigned TargetFlags=0)
Definition: SelectionDAG.h:764
llvm::ISD::ABS
@ ABS
ABS - Determine the unsigned absolute value of a signed integer value of the same bitwidth.
Definition: ISDOpcodes.h:674
llvm::AArch64ISD::RET_FLAG
@ RET_FLAG
Definition: AArch64ISelLowering.h:80
llvm::AArch64FunctionInfo::setVarArgsFPRSize
void setVarArgsFPRSize(unsigned Size)
Definition: AArch64MachineFunctionInfo.h:355
llvm::AArch64ISD::MRS
@ MRS
Definition: AArch64ISelLowering.h:305
InlinePriorityMode::Cost
@ Cost
llvm::ISD::SETGE
@ SETGE
Definition: ISDOpcodes.h:1456
llvm::AArch64_AM::isLogicalImmediate
static bool isLogicalImmediate(uint64_t imm, unsigned regSize)
isLogicalImmediate - Return true if the immediate is valid for a logical immediate instruction of the...
Definition: AArch64AddressingModes.h:276
llvm::CC_AArch64_DarwinPCS
bool CC_AArch64_DarwinPCS(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, CCState &State)
llvm::AArch64Subtarget::isLittleEndian
bool isLittleEndian() const
Definition: AArch64Subtarget.h:259
llvm::SelectionDAG::getTruncStore
SDValue getTruncStore(SDValue Chain, const SDLoc &dl, SDValue Val, SDValue Ptr, MachinePointerInfo PtrInfo, EVT SVT, Align Alignment, MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes())
Definition: SelectionDAG.cpp:8220
E
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
llvm::CCValAssign::getLocReg
Register getLocReg() const
Definition: CallingConvLower.h:126
llvm::MachineFunction::getInfo
Ty * getInfo()
getInfo - Keep track of various per-function pieces of information for backends that would like to do...
Definition: MachineFunction.h:770
Operation
PowerPC Reduce CR logical Operation
Definition: PPCReduceCRLogicals.cpp:735
llvm::AArch64ISD::GLD1S_SCALED_MERGE_ZERO
@ GLD1S_SCALED_MERGE_ZERO
Definition: AArch64ISelLowering.h:383
llvm::AArch64ISD::LDNP
@ LDNP
Definition: AArch64ISelLowering.h:479
llvm::User
Definition: User.h:44
llvm::AArch64_AM::isAdvSIMDModImmType5
static bool isAdvSIMDModImmType5(uint64_t Imm)
Definition: AArch64AddressingModes.h:512
llvm::SelectionDAG::getUNDEF
SDValue getUNDEF(EVT VT)
Return an UNDEF node. UNDEF does not have a useful SDLoc.
Definition: SelectionDAG.h:1057
llvm::AArch64Subtarget::getMaximumJumpTableSize
unsigned getMaximumJumpTableSize() const
Definition: AArch64Subtarget.h:253
llvm::ISD::CopyToReg
@ CopyToReg
CopyToReg - This node has three operands: a chain, a register number to set to this value,...
Definition: ISDOpcodes.h:203
llvm::ISD::CATCHRET
@ CATCHRET
CATCHRET - Represents a return from a catch block funclet.
Definition: ISDOpcodes.h:1043
llvm::ISD::SIGN_EXTEND_INREG
@ SIGN_EXTEND_INREG
SIGN_EXTEND_INREG - This operator atomically performs a SHL/SRA pair to sign extend a small value in ...
Definition: ISDOpcodes.h:781
isSplatShuffle
static bool isSplatShuffle(Value *V)
Definition: AArch64ISelLowering.cpp:13771
llvm::TargetLoweringBase::setOperationPromotedToType
void setOperationPromotedToType(unsigned Opc, MVT OrigVT, MVT DestVT)
Convenience method to set an operation to Promote and specify the type in a single call.
Definition: TargetLowering.h:2503
llvm::SelectionDAG::getTargetLoweringInfo
const TargetLowering & getTargetLoweringInfo() const
Definition: SelectionDAG.h:474
llvm::ISD::STRICT_FNEARBYINT
@ STRICT_FNEARBYINT
Definition: ISDOpcodes.h:422
llvm::AArch64Subtarget::isTargetILP32
bool isTargetILP32() const
Definition: AArch64Subtarget.h:273
llvm::EVT
Extended Value Type.
Definition: ValueTypes.h:34
findMoreOptimalIndexType
static bool findMoreOptimalIndexType(const MaskedGatherScatterSDNode *N, SDValue &BasePtr, SDValue &Index, SelectionDAG &DAG)
Definition: AArch64ISelLowering.cpp:19611
Intrinsics.h
llvm::TargetLoweringBase::AddrMode::HasBaseReg
bool HasBaseReg
Definition: TargetLowering.h:2598
llvm::Type::getDoubleTy
static Type * getDoubleTy(LLVMContext &C)
Definition: Type.cpp:229
C
(vector float) vec_cmpeq(*A, *B) C
Definition: README_ALTIVEC.txt:86
llvm::MVT::v1f16
@ v1f16
Definition: MachineValueType.h:146
llvm::AArch64FunctionInfo::setIsSVECC
void setIsSVECC(bool s)
Definition: AArch64MachineFunctionInfo.h:204
llvm::AArch64ISD::ORRi
@ ORRi
Definition: AArch64ISelLowering.h:190
getPackedSVEVectorVT
static EVT getPackedSVEVectorVT(EVT VT)
Definition: AArch64ISelLowering.cpp:155
llvm::ARM_PROC::A
@ A
Definition: ARMBaseInfo.h:34
llvm::AArch64ISD::VSRI
@ VSRI
Definition: AArch64ISelLowering.h:223
llvm::MVT::f64
@ f64
Definition: MachineValueType.h:58
llvm::SelectionDAG::getConstant
SDValue getConstant(uint64_t Val, const SDLoc &DL, EVT VT, bool isTarget=false, bool isOpaque=false)
Create a ConstantSDNode wrapping a constant value.
Definition: SelectionDAG.cpp:1540
llvm::AArch64ISD::LASTB
@ LASTB
Definition: AArch64ISelLowering.h:326
Twine.h
llvm::SDNode::isStrictFPOpcode
bool isStrictFPOpcode()
Test if this node is a strict floating point pseudo-op.
Definition: SelectionDAGNodes.h:681
isUZP_v_undef_Mask
static bool isUZP_v_undef_Mask(ArrayRef< int > M, EVT VT, unsigned &WhichResult)
isUZP_v_undef_Mask - Special case of isUZPMask for canonical form of "vector_shuffle v,...
Definition: AArch64ISelLowering.cpp:10910
llvm::EVT::is64BitVector
bool is64BitVector() const
Return true if this is a 64-bit vector type.
Definition: ValueTypes.h:180
llvm::JumpTableSDNode
Definition: SelectionDAGNodes.h:1866
llvm::EVT::getVectorNumElements
unsigned getVectorNumElements() const
Given a vector type, return the number of elements it contains.
Definition: ValueTypes.h:308
llvm::isShiftedMask_64
constexpr bool isShiftedMask_64(uint64_t Value)
Return true if the argument contains a non-empty sequence of ones with the remainder zero (64 bit ver...
Definition: MathExtras.h:286
llvm::TargetLoweringBase::setMaxBytesForAlignment
void setMaxBytesForAlignment(unsigned MaxBytes)
Definition: TargetLowering.h:2533
int
Clang compiles this i1 i64 store i64 i64 store i64 i64 store i64 i64 store i64 align Which gets codegen d xmm0 movaps rbp movaps rbp movaps rbp movaps rbp rbp rbp rbp rbp It would be better to have movq s of instead of the movaps s LLVM produces ret int
Definition: README.txt:536
isIntImmediate
static bool isIntImmediate(const SDNode *N, uint64_t &Imm)
Definition: AArch64ISelLowering.cpp:1987
llvm::TargetLowering
This class defines information used to lower LLVM code to legal SelectionDAG operators that the targe...
Definition: TargetLowering.h:3515
getSVEContainerType
static MVT getSVEContainerType(EVT ContentTy)
Definition: AArch64ISelLowering.cpp:18619
llvm::MVT::getScalarSizeInBits
uint64_t getScalarSizeInBits() const
Definition: MachineValueType.h:1144
llvm::AArch64ISD::SADDLP
@ SADDLP
Definition: AArch64ISelLowering.h:255
llvm::CC_AArch64_WebKit_JS
bool CC_AArch64_WebKit_JS(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, CCState &State)
llvm::AArch64ISD::SRA_PRED
@ SRA_PRED
Definition: AArch64ISelLowering.h:116
llvm::VectorType::getInteger
static VectorType * getInteger(VectorType *VTy)
This static method gets a VectorType with the same number of elements as the input type,...
Definition: DerivedTypes.h:440
llvm::AArch64ISD::UDIV_PRED
@ UDIV_PRED
Definition: AArch64ISelLowering.h:118
Y
static GCMetadataPrinterRegistry::Add< OcamlGCMetadataPrinter > Y("ocaml", "ocaml 3.10-compatible collector")
emitConditionalComparison
static SDValue emitConditionalComparison(SDValue LHS, SDValue RHS, ISD::CondCode CC, SDValue CCOp, AArch64CC::CondCode Predicate, AArch64CC::CondCode OutCC, const SDLoc &DL, SelectionDAG &DAG)
can be transformed to: not (and (not (and (setCC (cmp C)) (setCD (cmp D)))) (and (not (setCA (cmp A))...
Definition: AArch64ISelLowering.cpp:3184
llvm::SelectionDAG::MaskedValueIsZero
bool MaskedValueIsZero(SDValue Op, const APInt &Mask, unsigned Depth=0) const
Return true if 'Op & Mask' is known to be zero.
Definition: SelectionDAG.cpp:2559
llvm::AArch64FunctionInfo::setBytesInStackArgArea
void setBytesInStackArgArea(unsigned bytes)
Definition: AArch64MachineFunctionInfo.h:212
llvm::TargetTransformInfo::getIntImmCost
InstructionCost getIntImmCost(const APInt &Imm, Type *Ty, TargetCostKind CostKind) const
Return the expected cost of materializing for the given integer immediate of the specified type.
Definition: TargetTransformInfo.cpp:618
SI
@ SI
Definition: SIInstrInfo.cpp:7993
llvm::AArch64_AM::isAdvSIMDModImmType2
static bool isAdvSIMDModImmType2(uint64_t Imm)
Definition: AArch64AddressingModes.h:467
llvm::TargetRegisterClass
Definition: TargetRegisterInfo.h:45
llvm::ISD::FROUND
@ FROUND
Definition: ISDOpcodes.h:927
llvm::CodeGenOpt::Aggressive
@ Aggressive
-O3
Definition: CodeGen.h:61
performTruncateCombine
static SDValue performTruncateCombine(SDNode *N, SelectionDAG &DAG)
Definition: AArch64ISelLowering.cpp:17611
llvm::AArch64ISD::MUL_PRED
@ MUL_PRED
Definition: AArch64ISelLowering.h:107
llvm::TargetLowering::DAGCombinerInfo
Definition: TargetLowering.h:3933
llvm::CCState::AnalyzeReturn
void AnalyzeReturn(const SmallVectorImpl< ISD::OutputArg > &Outs, CCAssignFn Fn)
AnalyzeReturn - Analyze the returned values of a return, incorporating info about the result values i...
Definition: CallingConvLower.cpp:110
performSVESpliceCombine
static SDValue performSVESpliceCombine(SDNode *N, SelectionDAG &DAG)
Definition: AArch64ISelLowering.cpp:21433
WidenVector
static SDValue WidenVector(SDValue V64Reg, SelectionDAG &DAG)
WidenVector - Given a value in the V64 register class, produce the equivalent value in the V128 regis...
Definition: AArch64ISelLowering.cpp:10302
llvm::StringRef::getAsInteger
bool getAsInteger(unsigned Radix, T &Result) const
Parse the current string as an integer of the specified radix.
Definition: StringRef.h:469
llvm::AArch64ISD::CMLEz
@ CMLEz
Definition: AArch64ISelLowering.h:239
llvm::ms_demangle::QualifierMangleMode::Result
@ Result
llvm::ISD::TRUNCATE
@ TRUNCATE
TRUNCATE - Completely drop the high bits.
Definition: ISDOpcodes.h:769
llvm::MVT::SimpleTy
SimpleValueType SimpleTy
Definition: MachineValueType.h:341
llvm::ISD::SRA
@ SRA
Definition: ISDOpcodes.h:692
llvm::ISD::STRICT_FLOG10
@ STRICT_FLOG10
Definition: ISDOpcodes.h:419
llvm::TargetLoweringBase::ZeroOrNegativeOneBooleanContent
@ ZeroOrNegativeOneBooleanContent
Definition: TargetLowering.h:234
llvm::APInt::getAllOnes
static APInt getAllOnes(unsigned numBits)
Return an APInt of a specified width with all bits set.
Definition: APInt.h:214
areExtractShuffleVectors
static bool areExtractShuffleVectors(Value *Op1, Value *Op2, bool AllowSplat=false)
Check if both Op1 and Op2 are shufflevector extracts of either the lower or upper half of the vector ...
Definition: AArch64ISelLowering.cpp:13779
llvm::AArch64TargetLowering::isComplexDeinterleavingOperationSupported
bool isComplexDeinterleavingOperationSupported(ComplexDeinterleavingOperation Operation, Type *Ty) const override
Does this target support complex deinterleaving with the given operation and type.
Definition: AArch64ISelLowering.cpp:24336
llvm::ISD::LLROUND
@ LLROUND
Definition: ISDOpcodes.h:931
llvm::AMDGPU::PALMD::Key
Key
PAL metadata keys.
Definition: AMDGPUMetadata.h:486
llvm::isIntOrFPConstant
bool isIntOrFPConstant(SDValue V)
Return true if V is either a integer or FP constant.
Definition: SelectionDAGNodes.h:1759
UINT64_MAX
#define UINT64_MAX
Definition: DataTypes.h:77
llvm::Type::isVectorTy
bool isVectorTy() const
True if this is an instance of VectorType.
Definition: Type.h:258
llvm::InsertElementInst
This instruction inserts a single (scalar) element into a VectorType value.
Definition: Instructions.h:1945
isWideTypeMask
static bool isWideTypeMask(ArrayRef< int > M, EVT VT, SmallVectorImpl< int > &NewMask)
Definition: AArch64ISelLowering.cpp:11350
llvm::SPII::Store
@ Store
Definition: SparcInstrInfo.h:33
llvm::AArch64ISD::OBSCURE_COPY
@ OBSCURE_COPY
Definition: AArch64ISelLowering.h:67
removeRedundantInsertVectorElt
static SDValue removeRedundantInsertVectorElt(SDNode *N)
Definition: AArch64ISelLowering.cpp:21389
llvm::MVT::v16i1
@ v16i1
Definition: MachineValueType.h:70
llvm::Mips::GPRIdx
@ GPRIdx
Definition: MipsRegisterBankInfo.cpp:44
llvm::MVT::nxv2f16
@ nxv2f16
Definition: MachineValueType.h:247
llvm::AArch64ISD::FRSQRTE
@ FRSQRTE
Definition: AArch64ISelLowering.h:315
llvm::AArch64CC::HS
@ HS
Definition: AArch64BaseInfo.h:257
llvm::ISD::UDIVREM
@ UDIVREM
Definition: ISDOpcodes.h:256
llvm::MachinePointerInfo::getGOT
static MachinePointerInfo getGOT(MachineFunction &MF)
Return a MachinePointerInfo record that refers to a GOT entry.
Definition: MachineOperand.cpp:1058
TII
const HexagonInstrInfo * TII
Definition: HexagonCopyToCombine.cpp:125
llvm::dwarf::Index
Index
Definition: Dwarf.h:550
llvm::TargetLoweringBase::addRegisterClass
void addRegisterClass(MVT VT, const TargetRegisterClass *RC)
Add the specified register class as an available regclass for the specified value type.
Definition: TargetLowering.h:2356
llvm::AArch64TargetLowering::getIRStackGuard
Value * getIRStackGuard(IRBuilderBase &IRB) const override
If the target has a standard location for the stack protector cookie, returns the address of that loc...
Definition: AArch64ISelLowering.cpp:22913
llvm::MaybeAlign
This struct is a compact representation of a valid (power of two) or undefined (0) alignment.
Definition: Alignment.h:117
llvm::AArch64TargetLowering::allowsMisalignedMemoryAccesses
bool allowsMisalignedMemoryAccesses(EVT VT, unsigned AddrSpace=0, Align Alignment=Align(1), MachineMemOperand::Flags Flags=MachineMemOperand::MONone, unsigned *Fast=nullptr) const override
Returns true if the target allows unaligned memory accesses of the specified type.
Definition: AArch64ISelLowering.cpp:2259
llvm::PatternMatch::m_ZExt
CastClass_match< OpTy, Instruction::ZExt > m_ZExt(const OpTy &Op)
Matches ZExt.
Definition: PatternMatch.h:1629
llvm::Log2_32
unsigned Log2_32(uint32_t Value)
Return the floor log base 2 of the specified value, -1 if the value is zero.
Definition: MathExtras.h:382
llvm::TargetLoweringBase::MaxLoadsPerMemcmpOptSize
unsigned MaxLoadsPerMemcmpOptSize
Likewise for functions with the OptSize attribute.
Definition: TargetLowering.h:3474
llvm::ShuffleVectorSDNode::isSplat
bool isSplat() const
Definition: SelectionDAGNodes.h:1550
B
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
llvm::Function::arg_end
arg_iterator arg_end()
Definition: Function.h:775
llvm::isUIntN
bool isUIntN(unsigned N, uint64_t x)
Checks if an unsigned integer fits into the given (dynamic) bit width.
Definition: MathExtras.h:256
llvm::ISD::NodeType
NodeType
ISD::NodeType enum - This enum defines the target-independent operators for a SelectionDAG.
Definition: ISDOpcodes.h:40
llvm::IntegerType
Class to represent integer types.
Definition: DerivedTypes.h:40
llvm::MVT::v8i1
@ v8i1
Definition: MachineValueType.h:69
llvm::MVT::v4f64
@ v4f64
Definition: MachineValueType.h:192
llvm::AArch64TargetLowering::isMulAddWithConstProfitable
bool isMulAddWithConstProfitable(SDValue AddNode, SDValue ConstNode) const override
Return true if it may be profitable to transform (mul (add x, c1), c2) -> (add (mul x,...
Definition: AArch64ISelLowering.cpp:14825
llvm::SDNode::uses
iterator_range< use_iterator > uses()
Definition: SelectionDAGNodes.h:806
llvm::AArch64ISD::MOVIedit
@ MOVIedit
Definition: AArch64ISelLowering.h:182
llvm::AArch64ISD::SINT_TO_FP_MERGE_PASSTHRU
@ SINT_TO_FP_MERGE_PASSTHRU
Definition: AArch64ISelLowering.h:143
llvm::AArch64TargetLowering::isExtractSubvectorCheap
bool isExtractSubvectorCheap(EVT ResVT, EVT SrcVT, unsigned Index) const override
Return true if EXTRACT_SUBVECTOR is cheap for this result type with this index.
Definition: AArch64ISelLowering.cpp:15073
llvm::ISD::ATOMIC_LOAD_AND
@ ATOMIC_LOAD_AND
Definition: ISDOpcodes.h:1189
llvm::BinaryOperator::getOpcode
BinaryOps getOpcode() const
Definition: InstrTypes.h:391
llvm::PatternMatch::m_ConstantInt
class_match< ConstantInt > m_ConstantInt()
Match an arbitrary ConstantInt and ignore it.
Definition: PatternMatch.h:147
llvm::M0
unsigned M0(unsigned Val)
Definition: VE.h:467
llvm::MaskedLoadSDNode::getBasePtr
const SDValue & getBasePtr() const
Definition: SelectionDAGNodes.h:2674
llvm::AArch64TargetLowering::getTargetMMOFlags
MachineMemOperand::Flags getTargetMMOFlags(const Instruction &I) const override
This callback is used to inspect load/store instructions and add target-specific MachineMemOperand fl...
Definition: AArch64ISelLowering.cpp:14321
llvm::AArch64ISD::SRAD_MERGE_OP1
@ SRAD_MERGE_OP1
Definition: AArch64ISelLowering.h:125
llvm::EVT::isInteger
bool isInteger() const
Return true if this is an integer or a vector integer type.
Definition: ValueTypes.h:144
llvm::MaskedStoreSDNode::getMask
const SDValue & getMask() const
Definition: SelectionDAGNodes.h:2713
llvm::Instruction
Definition: Instruction.h:41
llvm::AArch64ISD::UADDLP
@ UADDLP
Definition: AArch64ISelLowering.h:256
llvm::MCID::Flag
Flag
These should be considered private to the implementation of the MCInstrDesc class.
Definition: MCInstrDesc.h:148
llvm::RetCC_AArch64_AAPCS
bool RetCC_AArch64_AAPCS(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, CCState &State)
Concat
static constexpr int Concat[]
Definition: X86InterleavedAccess.cpp:239
llvm::CC_AArch64_AAPCS
bool CC_AArch64_AAPCS(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, CCState &State)
llvm::Type::getScalarSizeInBits
unsigned getScalarSizeInBits() const LLVM_READONLY
If this is a vector type, return the getPrimitiveSizeInBits value for the element type.
Definition: Type.cpp:188
tryCombineToEXTR
static SDValue tryCombineToEXTR(SDNode *N, TargetLowering::DAGCombinerInfo &DCI)
EXTR instruction extracts a contiguous chunk of bits from two existing registers viewed as a high/low...
Definition: AArch64ISelLowering.cpp:16082
ShuffleOps
std::pair< Value *, Value * > ShuffleOps
We are building a shuffle to create V, which is a sequence of insertelement, extractelement pairs.
Definition: InstCombineVectorOps.cpp:763
llvm::AArch64ISD::ST1_PRED
@ ST1_PRED
Definition: AArch64ISelLowering.h:414
llvm::AttributeList::hasFnAttr
bool hasFnAttr(Attribute::AttrKind Kind) const
Return true if the attribute exists for the function.
Definition: Attributes.cpp:1479
legalizeSVEGatherPrefetchOffsVec
static SDValue legalizeSVEGatherPrefetchOffsVec(SDNode *N, SelectionDAG &DAG)
Legalize the gather prefetch (scalar + vector addressing mode) when the offset vector is an unpacked ...
Definition: AArch64ISelLowering.cpp:21322
llvm::AArch64ISD::STZ2G
@ STZ2G
Definition: AArch64ISelLowering.h:475
llvm::ShuffleVectorSDNode::getMask
ArrayRef< int > getMask() const
Definition: SelectionDAGNodes.h:1540
llvm::AArch64ISD::UMINV_PRED
@ UMINV_PRED
Definition: AArch64ISelLowering.h:274
checkValueWidth
static bool checkValueWidth(SDValue V, unsigned width, ISD::LoadExtType &ExtType)
Definition: AArch64ISelLowering.cpp:19858
llvm::AArch64ISD::GLD1S_UXTW_MERGE_ZERO
@ GLD1S_UXTW_MERGE_ZERO
Definition: AArch64ISelLowering.h:384
llvm::AtomicOrdering::Acquire
@ Acquire
llvm::AArch64ISD::DUPLANE16
@ DUPLANE16
Definition: AArch64ISelLowering.h:174
llvm::AArch64ISD::MRRS
@ MRRS
Definition: AArch64ISelLowering.h:439
llvm::ISD::SINT_TO_FP
@ SINT_TO_FP
[SU]INT_TO_FP - These operators convert integers (whose interpreted sign depends on the first letter)...
Definition: ISDOpcodes.h:773
llvm::IRBuilderBase::getInt8Ty
IntegerType * getInt8Ty()
Fetch the type representing an 8-bit integer.
Definition: IRBuilder.h:502
checkZExtBool
static bool checkZExtBool(SDValue Arg, const SelectionDAG &DAG)
Definition: AArch64ISelLowering.cpp:7120
llvm::AArch64ISD::SST1_PRED
@ SST1_PRED
Definition: AArch64ISelLowering.h:417
llvm::report_fatal_error
void report_fatal_error(Error Err, bool gen_crash_diag=true)
Report a serious error, calling any installed error handler.
Definition: Error.cpp:145
llvm::APInt::getZExtValue
uint64_t getZExtValue() const
Get zero extended value.
Definition: APInt.h:1494
llvm::AtomicRMWInst::Nand
@ Nand
*p = ~(old & v)
Definition: Instructions.h:740
llvm::ShuffleVectorInst::isExtractSubvectorMask
static bool isExtractSubvectorMask(ArrayRef< int > Mask, int NumSrcElts, int &Index)
Return true if this shuffle mask is an extract subvector mask.
Definition: Instructions.cpp:2434
Options
const char LLVMTargetMachineRef LLVMPassBuilderOptionsRef Options
Definition: PassBuilderBindings.cpp:48
llvm::STATISTIC
STATISTIC(NumFunctions, "Total number of functions")
llvm::MVT::v16f16
@ v16f16
Definition: MachineValueType.h:151
llvm::MachineFrameInfo::getStackID
uint8_t getStackID(int ObjectIdx) const
Definition: MachineFrameInfo.h:731
llvm::TargetLoweringBase::getSDagStackGuard
virtual Value * getSDagStackGuard(const Module &M) const
Return the variable that's previously inserted by insertSSPDeclarations, if any, otherwise return nul...
Definition: TargetLoweringBase.cpp:1987
llvm::MaskedStoreSDNode::getValue
const SDValue & getValue() const
Definition: SelectionDAGNodes.h:2710
llvm::AArch64FunctionInfo::incNumLocalDynamicTLSAccesses
void incNumLocalDynamicTLSAccesses()
Definition: AArch64MachineFunctionInfo.h:331
llvm::APInt::getHighBitsSet
static APInt getHighBitsSet(unsigned numBits, unsigned hiBitsSet)
Constructs an APInt value that has the top hiBitsSet bits set.
Definition: APInt.h:279
APFloat.h
llvm::MachineFrameInfo::SSPLK_None
@ SSPLK_None
Did not trigger a stack protector.
Definition: MachineFrameInfo.h:111
llvm::MVT::nxv4i8
@ nxv4i8
Definition: MachineValueType.h:216
llvm::ISD::FNEARBYINT
@ FNEARBYINT
Definition: ISDOpcodes.h:926
llvm::ISD::FRINT
@ FRINT
Definition: ISDOpcodes.h:925
llvm::codeview::EncodedFramePtrReg::BasePtr
@ BasePtr
llvm::ISD::ATOMIC_STORE
@ ATOMIC_STORE
OUTCHAIN = ATOMIC_STORE(INCHAIN, ptr, val) This corresponds to "store atomic" instruction.
Definition: ISDOpcodes.h:1165
llvm::AArch64CC::LE
@ LE
Definition: AArch64BaseInfo.h:268
llvm::MVT::nxv4f32
@ nxv4f32
Definition: MachineValueType.h:262
OperandTraits.h
llvm::UndefValue::get
static UndefValue * get(Type *T)
Static factory methods - Return an 'undef' object of the specified type.
Definition: Constants.cpp:1740
llvm::FrameIndexSDNode
Definition: SelectionDAGNodes.h:1789
llvm::AArch64ISD::ZIP2
@ ZIP2
Definition: AArch64ISelLowering.h:198
llvm::pdb::PDB_LocType::TLS
@ TLS
performExtractSubvectorCombine
static SDValue performExtractSubvectorCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, SelectionDAG &DAG)
Definition: AArch64ISelLowering.cpp:16919
llvm::AArch64ISD::GLDFF1S_SXTW_MERGE_ZERO
@ GLDFF1S_SXTW_MERGE_ZERO
Definition: AArch64ISelLowering.h:403
ReplaceCMP_SWAP_128Results
static void ReplaceCMP_SWAP_128Results(SDNode *N, SmallVectorImpl< SDValue > &Results, SelectionDAG &DAG, const AArch64Subtarget *Subtarget)
Definition: AArch64ISelLowering.cpp:22155
llvm::bitc::NoNaNs
@ NoNaNs
Definition: LLVMBitCodes.h:499
llvm::ConstantInt::get
static Constant * get(Type *Ty, uint64_t V, bool IsSigned=false)
If Ty is a vector type, return a Constant with a splat of the given value.
Definition: Constants.cpp:887
llvm::AArch64FunctionInfo::setSRetReturnReg
void setSRetReturnReg(unsigned Reg)
Definition: AArch64MachineFunctionInfo.h:358
llvm::AArch64FunctionInfo::getVarArgsStackIndex
int getVarArgsStackIndex() const
Definition: AArch64MachineFunctionInfo.h:339
performLastTrueTestVectorCombine
static SDValue performLastTrueTestVectorCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, const AArch64Subtarget *Subtarget)
Definition: AArch64ISelLowering.cpp:16617
llvm::SelectionDAG::getMemIntrinsicNode
SDValue getMemIntrinsicNode(unsigned Opcode, const SDLoc &dl, SDVTList VTList, ArrayRef< SDValue > Ops, EVT MemVT, MachinePointerInfo PtrInfo, Align Alignment, MachineMemOperand::Flags Flags=MachineMemOperand::MOLoad|MachineMemOperand::MOStore, uint64_t Size=0, const AAMDNodes &AAInfo=AAMDNodes())
Creates a MemIntrinsicNode that may produce a result and takes a list of operands.
Definition: SelectionDAG.cpp:7893
llvm::TargetLowering::C_Immediate
@ C_Immediate
Definition: TargetLowering.h:4625
isOpcWithIntImmediate
static bool isOpcWithIntImmediate(const SDNode *N, unsigned Opc, uint64_t &Imm)
Definition: AArch64ISelLowering.cpp:1998
llvm::AArch64ISD::FCMEQ
@ FCMEQ
Definition: AArch64ISelLowering.h:231
INT64_MAX
#define INT64_MAX
Definition: DataTypes.h:71
llvm::AArch64CC::PL
@ PL
Definition: AArch64BaseInfo.h:260
llvm::AArch64ISD::VSLI
@ VSLI
Definition: AArch64ISelLowering.h:222
EmitVectorComparison
static SDValue EmitVectorComparison(SDValue LHS, SDValue RHS, AArch64CC::CondCode CC, bool NoNans, EVT VT, const SDLoc &dl, SelectionDAG &DAG)
Definition: AArch64ISelLowering.cpp:13060
DebugLoc.h
llvm::MachineFrameInfo::hasMustTailInVarArgFunc
bool hasMustTailInVarArgFunc() const
Returns true if the function is variadic and contains a musttail call.
Definition: MachineFrameInfo.h:634
lowerADDSUBCARRY
static SDValue lowerADDSUBCARRY(SDValue Op, SelectionDAG &DAG, unsigned Opcode, bool IsSigned)
Definition: AArch64ISelLowering.cpp:3796
isVShiftLImm
static bool isVShiftLImm(SDValue Op, EVT VT, bool isLong, int64_t &Cnt)
isVShiftLImm - Check if this is a valid build_vector for the immediate operand of a vector shift left...
Definition: AArch64ISelLowering.cpp:12957
llvm::EVT::changeVectorElementType
EVT changeVectorElementType(EVT EltVT) const
Return a VT for a vector type whose attributes match ourselves with the exception of the element type...
Definition: ValueTypes.h:101
UseTlsOffset
static Value * UseTlsOffset(IRBuilderBase &IRB, unsigned Offset)
Definition: AArch64ISelLowering.cpp:22903
llvm::SelectionDAG::SplitVectorOperand
std::pair< SDValue, SDValue > SplitVectorOperand(const SDNode *N, unsigned OpNo)
Split the node's operand with EXTRACT_SUBVECTOR and return the low/high part.
Definition: SelectionDAG.h:2214
llvm::MachineFrameInfo::getObjectOffset
int64_t getObjectOffset(int ObjectIdx) const
Return the assigned stack offset of the specified object from the incoming stack pointer.
Definition: MachineFrameInfo.h:526
llvm::AArch64ISD::FABS_MERGE_PASSTHRU
@ FABS_MERGE_PASSTHRU
Definition: AArch64ISelLowering.h:129
llvm::TargetLoweringBase::MaxStoresPerMemcpy
unsigned MaxStoresPerMemcpy
Specify maximum number of store instructions per memcpy call.
Definition: TargetLowering.h:3451
Info
Analysis containing CSE Info
Definition: CSEInfo.cpp:27
llvm::AArch64ISD::STILP
@ STILP
Definition: AArch64ISelLowering.h:481
tryAdvSIMDModImm64
static SDValue tryAdvSIMDModImm64(unsigned NewOp, SDValue Op, SelectionDAG &DAG, const APInt &Bits)
Definition: AArch64ISelLowering.cpp:11771
llvm::AArch64ISD::FP_ROUND_MERGE_PASSTHRU
@ FP_ROUND_MERGE_PASSTHRU
Definition: AArch64ISelLowering.h:140
llvm::BitVector
Definition: BitVector.h:75
llvm::ISD::AND
@ AND
Bitwise operators - logical and, logical or, logical xor.
Definition: ISDOpcodes.h:666
llvm::CCValAssign::getLocInfo
LocInfo getLocInfo() const
Definition: CallingConvLower.h:132
llvm::ISD::SMULO
@ SMULO
Same for multiplication.
Definition: ISDOpcodes.h:331
llvm::APInt::sle
bool sle(const APInt &RHS) const
Signed less or equal comparison.
Definition: APInt.h:1144
llvm::ISD::SETOLT
@ SETOLT
Definition: ISDOpcodes.h:1440
overflowFlagToValue
static SDValue overflowFlagToValue(SDValue Flag, EVT VT, SelectionDAG &DAG)
Definition: AArch64ISelLowering.cpp:3785
llvm::ISD::ABDS
@ ABDS
Definition: ISDOpcodes.h:655
llvm::MemSDNode::isNonTemporal
bool isNonTemporal() const
Definition: SelectionDAGNodes.h:1315
Align
uint64_t Align
Definition: ELFObjHandler.cpp:82
llvm::ISD::SPLAT_VECTOR
@ SPLAT_VECTOR
SPLAT_VECTOR(VAL) - Returns a vector with the scalar value VAL duplicated in all lanes.
Definition: ISDOpcodes.h:613
PatternMatch.h
llvm::TargetLoweringBase::insertSSPDeclarations
virtual void insertSSPDeclarations(Module &M) const
Inserts necessary declarations for SSP (stack protection) purpose.
Definition: TargetLoweringBase.cpp:1971
llvm::GlobalAddressSDNode::getGlobal
const GlobalValue * getGlobal() const
Definition: SelectionDAGNodes.h:1775
llvm::APInt::countTrailingZeros
unsigned countTrailingZeros() const
Count the number of trailing zero bits.
Definition: APInt.h:1591
llvm::AArch64Subtarget::getProcFamily
ARMProcFamilyEnum getProcFamily() const
Returns ARM processor family.
Definition: AArch64Subtarget.h:198
llvm::ISD::EntryToken
@ EntryToken
EntryToken - This is the marker used to indicate the start of a region.
Definition: ISDOpcodes.h:47
llvm::ISD::FSINCOS
@ FSINCOS
FSINCOS - Compute both fsin and fcos as a single operation.
Definition: ISDOpcodes.h:959
llvm::TargetLoweringBase::TypeWidenVector
@ TypeWidenVector
Definition: TargetLowering.h:214
llvm::CCValAssign::getLocMemOffset
unsigned getLocMemOffset() const
Definition: CallingConvLower.h:127
llvm::MaskedLoadStoreSDNode::isUnindexed
bool isUnindexed() const
Return true if this is NOT a pre/post inc/dec load/store.
Definition: SelectionDAGNodes.h:2649
llvm::AArch64TargetLowering::hasPairedLoad
bool hasPairedLoad(EVT LoadedType, Align &RequiredAligment) const override
Return true if the target supplies and combines to a paired load two loaded values of type LoadedType...
Definition: AArch64ISelLowering.cpp:14299
llvm::ConstantSDNode::isOne
bool isOne() const
Definition: SelectionDAGNodes.h:1609
llvm::ISD::UNINDEXED
@ UNINDEXED
Definition: ISDOpcodes.h:1383
llvm::AArch64ISD::VSHL
@ VSHL
Definition: AArch64ISelLowering.h:210
llvm::FixedVectorType::get
static FixedVectorType * get(Type *ElementType, unsigned NumElts)
Definition: Type.cpp:686
llvm::AArch64ISD::SETCC_MERGE_ZERO
@ SETCC_MERGE_ZERO
Definition: AArch64ISelLowering.h:151
llvm::CallInst::isTailCall
bool isTailCall() const
Definition: Instructions.h:1677
llvm::MVT::v1i64
@ v1i64
Definition: MachineValueType.h:130
llvm::Align
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition: Alignment.h:39
llvm::NVPTX::PTXLdStInstCode::Scalar
@ Scalar
Definition: NVPTX.h:123
llvm::TargetLoweringBase::setBooleanVectorContents
void setBooleanVectorContents(BooleanContent Ty)
Specify how the target extends the result of a vector boolean value from a vector of i1 to a wider ty...
Definition: TargetLowering.h:2304
foldVectorXorShiftIntoCmp
static SDValue foldVectorXorShiftIntoCmp(SDNode *N, SelectionDAG &DAG, const AArch64Subtarget *Subtarget)
Turn vector tests of the signbit in the form of: xor (sra X, elt_size(X)-1), -1 into: cmge X,...
Definition: AArch64ISelLowering.cpp:15085
performMulCombine
static SDValue performMulCombine(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const AArch64Subtarget *Subtarget)
Definition: AArch64ISelLowering.cpp:15657
llvm::AArch64II::MO_DLLIMPORTAUX
@ MO_DLLIMPORTAUX
MO_DLLIMPORTAUX - Symbol refers to "auxilliary" import stub.
Definition: AArch64BaseInfo.h:781
llvm::AArch64ISD::UUNPKLO
@ UUNPKLO
Definition: AArch64ISelLowering.h:321
llvm::AArch64ISD::SBC
@ SBC
Definition: AArch64ISelLowering.h:91
llvm::AtomicRMWInst::Xchg
@ Xchg
*p = v
Definition: Instructions.h:732
llvm::SDValue::getConstantOperandVal
uint64_t getConstantOperandVal(unsigned i) const
Definition: SelectionDAGNodes.h:1153
llvm::AArch64_AM::getFP32Imm
static int getFP32Imm(const APInt &Imm)
getFP32Imm - Return an 8-bit floating-point version of the 32-bit floating-point value.
Definition: AArch64AddressingModes.h:394
llvm::AArch64ISD::SMSTART
@ SMSTART
Definition: AArch64ISelLowering.h:68
llvm::EVT::changeVectorElementTypeToInteger
EVT changeVectorElementTypeToInteger() const
Return a vector with the same number of elements as this vector, but with the element type converted ...
Definition: ValueTypes.h:93
llvm::SDValue::getValueSizeInBits
TypeSize getValueSizeInBits() const
Returns the size of the value in bits.
Definition: SelectionDAGNodes.h:199
llvm::AArch64ISD::SADDV_PRED
@ SADDV_PRED
Definition: AArch64ISelLowering.h:269
llvm::AArch64::SMEMatrixTileS
@ SMEMatrixTileS
Definition: AArch64InstrInfo.h:590
llvm::isIntN
bool isIntN(unsigned N, int64_t x)
Checks if an signed integer fits into the given (dynamic) bit width.
Definition: MathExtras.h:261
ReplaceATOMIC_LOAD_128Results
static void ReplaceATOMIC_LOAD_128Results(SDNode *N, SmallVectorImpl< SDValue > &Results, SelectionDAG &DAG, const AArch64Subtarget *Subtarget)
Definition: AArch64ISelLowering.cpp:22317
splitStores
static SDValue splitStores(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, SelectionDAG &DAG, const AArch64Subtarget *Subtarget)
Definition: AArch64ISelLowering.cpp:18898
llvm::EVT::bitsLT
bool bitsLT(EVT VT) const
Return true if this has less bits than VT.
Definition: ValueTypes.h:272
llvm::AArch64ISD::UITOF
@ UITOF
Definition: AArch64ISelLowering.h:296
llvm::AArch64::getGPRArgRegs
const ArrayRef< MCPhysReg > getGPRArgRegs()
Definition: AArch64ISelLowering.cpp:151
llvm::MVT::v4i16
@ v4i16
Definition: MachineValueType.h:100
llvm::EVT::getTypeForEVT
Type * getTypeForEVT(LLVMContext &Context) const
This method returns an LLVM type corresponding to the specified EVT.
Definition: ValueTypes.cpp:190
llvm::CCValAssign::isRegLoc
bool isRegLoc() const
Definition: CallingConvLower.h:120
llvm::AArch64TargetLowering::shouldExpandAtomicRMWInIR
TargetLoweringBase::AtomicExpansionKind shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const override
Returns how the IR-level AtomicExpand pass should expand the given AtomicRMW, if at all.
Definition: AArch64ISelLowering.cpp:22728
isZIP_v_undef_Mask
static bool isZIP_v_undef_Mask(ArrayRef< int > M, EVT VT, unsigned &WhichResult)
isZIP_v_undef_Mask - Special case of isZIPMask for canonical form of "vector_shuffle v,...
Definition: AArch64ISelLowering.cpp:10891
llvm::ISD::USUBO
@ USUBO
Definition: ISDOpcodes.h:328
llvm::MemSDNode::isVolatile
bool isVolatile() const
Definition: SelectionDAGNodes.h:1314
llvm::MVT::v4i8
@ v4i8
Definition: MachineValueType.h:87
llvm::CallingConv::ID
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition: CallingConv.h:24
llvm::ShuffleVectorInst::getType
VectorType * getType() const
Overload to return most specific vector type.
Definition: Instructions.h:2063
llvm::StoreSDNode::isTruncatingStore
bool isTruncatingStore() const
Return true if the op does a truncation before store.
Definition: SelectionDAGNodes.h:2392
Type.h
llvm::ISD::SETOLE
@ SETOLE
Definition: ISDOpcodes.h:1441
llvm::MVT::nxv4i16
@ nxv4i16
Definition: MachineValueType.h:224
X
static GCMetadataPrinterRegistry::Add< ErlangGCPrinter > X("erlang", "erlang-compatible garbage collector")
llvm::Triple::isOSMSVCRT
bool isOSMSVCRT() const
Is this a "Windows" OS targeting a "MSVCRT.dll" environment.
Definition: Triple.h:626
llvm::AArch64TargetLowering::getOptimalMemOpType
EVT getOptimalMemOpType(const MemOp &Op, const AttributeList &FuncAttributes) const override
Returns the target specific optimal type for load and store operations as a result of memset,...
Definition: AArch64ISelLowering.cpp:14746
llvm::MachineBasicBlock
Definition: MachineBasicBlock.h:94
llvm::AArch64TargetLowering::lowerInterleavedLoad
bool lowerInterleavedLoad(LoadInst *LI, ArrayRef< ShuffleVectorInst * > Shuffles, ArrayRef< unsigned > Indices, unsigned Factor) const override
Lower an interleaved load into a ldN intrinsic.
Definition: AArch64ISelLowering.cpp:14402
llvm::ISD::SETUGT
@ SETUGT
Definition: ISDOpcodes.h:1446
llvm::MVT::getScalarType
MVT getScalarType() const
If this is a vector, return the element type, otherwise return this.
Definition: MachineValueType.h:544
llvm::IRBuilderBase::CreatePointerCast
Value * CreatePointerCast(Value *V, Type *DestTy, const Twine &Name="")
Definition: IRBuilder.h:2054
llvm::AArch64ISD::UQSHL_I
@ UQSHL_I
Definition: AArch64ISelLowering.h:216
Int
@ Int
Definition: TargetLibraryInfo.cpp:50
llvm::AArch64TargetLowering::EmitFill
MachineBasicBlock * EmitFill(MachineInstr &MI, MachineBasicBlock *BB) const
Definition: AArch64ISelLowering.cpp:2716
llvm::SelectionDAG::getTargetGlobalAddress
SDValue getTargetGlobalAddress(const GlobalValue *GV, const SDLoc &DL, EVT VT, int64_t offset=0, unsigned TargetFlags=0)
Definition: SelectionDAG.h:717
llvm::AArch64_AM::isAdvSIMDModImmType9
static bool isAdvSIMDModImmType9(uint64_t Imm)
Definition: AArch64AddressingModes.h:574
performSVEAndCombine
static SDValue performSVEAndCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI)
Definition: AArch64ISelLowering.cpp:16401
llvm::StringRef::slice
StringRef slice(size_t Start, size_t End) const
Return a reference to the substring from [Start, End).
Definition: StringRef.h:672
llvm::AVRISD::TST
@ TST
Test for zero or minus instruction.
Definition: AVRISelLowering.h:70
llvm::AArch64ISD::PTEST
@ PTEST
Definition: AArch64ISelLowering.h:338
LoopInfo.h
changeIntCCToAArch64CC
static AArch64CC::CondCode changeIntCCToAArch64CC(ISD::CondCode CC)
changeIntCCToAArch64CC - Convert a DAG integer condition code to an AArch64 CC
Definition: AArch64ISelLowering.cpp:2883
LowerTruncateVectorStore
static SDValue LowerTruncateVectorStore(SDLoc DL, StoreSDNode *ST, EVT VT, EVT MemVT, SelectionDAG &DAG)
Definition: AArch64ISelLowering.cpp:5603
llvm::AArch64ISD::HADDS_PRED
@ HADDS_PRED
Definition: AArch64ISelLowering.h:105
splitInt128
static std::pair< SDValue, SDValue > splitInt128(SDValue N, SelectionDAG &DAG)
Definition: AArch64ISelLowering.cpp:22094
llvm::PatternMatch::m_ExtractElt
TwoOps_match< Val_t, Idx_t, Instruction::ExtractElement > m_ExtractElt(const Val_t &Val, const Idx_t &Idx)
Matches ExtractElementInst.
Definition: PatternMatch.h:1492
isOperandOfVmullHighP64
static bool isOperandOfVmullHighP64(Value *Op)
Check if Op could be used with vmull_high_p64 intrinsic.
Definition: AArch64ISelLowering.cpp:13851
llvm::ComplexDeinterleavingRotation::Rotation_270
@ Rotation_270
llvm::AArch64FunctionInfo::setVarArgsFPRIndex
void setVarArgsFPRIndex(int Index)
Definition: AArch64MachineFunctionInfo.h:352
Operands
mir Rename Register Operands
Definition: MIRNamerPass.cpp:74
llvm::ComplexDeinterleavingRotation
ComplexDeinterleavingRotation
Definition: ComplexDeinterleavingPass.h:44
llvm::APInt::ashr
APInt ashr(unsigned ShiftAmt) const
Arithmetic right-shift function.
Definition: APInt.h:815
llvm::MVT::isFloatingPoint
bool isFloatingPoint() const
Return true if this is a FP or a vector FP type.
Definition: MachineValueType.h:366
llvm::ISD::POST_INC
@ POST_INC
Definition: ISDOpcodes.h:1383
llvm::Function::getAttributes
AttributeList getAttributes() const
Return the attribute list for this Function.
Definition: Function.h:313
uint64_t
Class for arbitrary precision integers APInt is a functional replacement for common case unsigned integer type like unsigned long or uint64_t
Definition: tmp.txt:1
llvm::all_equal
bool all_equal(std::initializer_list< T > Values)
Returns true if all Values in the initializer lists are equal or the list.
Definition: STLExtras.h:1986
llvm::CCAssignFn
bool CCAssignFn(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, CCState &State)
CCAssignFn - This function assigns a location for Val, updating State to reflect the change.
Definition: CallingConvLower.h:154
llvm::TargetLoweringBase::getMemValueType
EVT getMemValueType(const DataLayout &DL, Type *Ty, bool AllowUnknown=false) const
Definition: TargetLowering.h:1520
getPredicateForScalableVector
static SDValue getPredicateForScalableVector(SelectionDAG &DAG, SDLoc &DL, EVT VT)
Definition: AArch64ISelLowering.cpp:23275
findEXTRHalf
static bool findEXTRHalf(SDValue N, SDValue &Src, uint32_t &ShiftAmount, bool &FromHi)
An EXTR instruction is made up of two shifts, ORed together.
Definition: AArch64ISelLowering.cpp:16060
AArch64AddressingModes.h
llvm::MVT::f80
@ f80
Definition: MachineValueType.h:59
llvm::ISD::FPOW
@ FPOW
Definition: ISDOpcodes.h:917
foldADCToCINC
static SDValue foldADCToCINC(SDNode *N, SelectionDAG &DAG)
Definition: AArch64ISelLowering.cpp:17513
llvm::VTSDNode::getVT
EVT getVT() const
Definition: SelectionDAGNodes.h:2307
llvm::Type::isIntegerTy
bool isIntegerTy() const
True if this is an instance of IntegerType.
Definition: Type.h:222
llvm::TargetLoweringBase::PredictableSelectIsExpensive
bool PredictableSelectIsExpensive
Tells the code generator that select is more expensive than a branch if the branch is usually predict...
Definition: TargetLowering.h:3492
llvm::to_vector
SmallVector< ValueTypeFromRangeType< R >, Size > to_vector(R &&Range)
Given a range of type R, iterate the entire range and return a SmallVector with elements of the vecto...
Definition: SmallVector.h:1298
NarrowVector
static SDValue NarrowVector(SDValue V128Reg, SelectionDAG &DAG)
NarrowVector - Given a value in the V128 register class, produce the equivalent value in the V64 regi...
Definition: AArch64ISelLowering.cpp:10322
setInfoSVEStN
static bool setInfoSVEStN(const AArch64TargetLowering &TLI, const DataLayout &DL, AArch64TargetLowering::IntrinsicInfo &Info, const CallInst &CI)
Set the IntrinsicInfo for the aarch64_sve_st<N> intrinsics.
Definition: AArch64ISelLowering.cpp:13440
llvm::ISD::FADD
@ FADD
Simple binary floating point operators.
Definition: ISDOpcodes.h:390
llvm::AArch64FunctionInfo::setLazySaveTPIDR2Obj
void setLazySaveTPIDR2Obj(unsigned Reg)
Definition: AArch64MachineFunctionInfo.h:207
llvm::TargetRegisterInfo::regmaskSubsetEqual
bool regmaskSubsetEqual(const uint32_t *mask0, const uint32_t *mask1) const
Return true if all bits that are set in mask mask0 are also set in mask1.
Definition: TargetRegisterInfo.cpp:492
createTblForTrunc
static void createTblForTrunc(TruncInst *TI, bool IsLittleEndian)
Definition: AArch64ISelLowering.cpp:14101
llvm::MVT::v2i8
@ v2i8
Definition: MachineValueType.h:86
llvm::CallingConv::AArch64_VectorCall
@ AArch64_VectorCall
Used between AArch64 Advanced SIMD functions.
Definition: CallingConv.h:218
llvm::TargetLowering::TargetLoweringOpt
A convenience struct that encapsulates a DAG, and two SDValues for returning information from TargetL...
Definition: TargetLowering.h:3654
llvm::ISD::BlockAddress
@ BlockAddress
Definition: ISDOpcodes.h:84
performVSelectCombine
static SDValue performVSelectCombine(SDNode *N, SelectionDAG &DAG)
Definition: AArch64ISelLowering.cpp:20713
llvm::DataLayout::getPrefTypeAlign
Align getPrefTypeAlign(Type *Ty) const
Returns the preferred stack/global alignment for the specified type.
Definition: DataLayout.cpp:857
llvm::TargetLowering::CallLoweringInfo::Outs
SmallVector< ISD::OutputArg, 32 > Outs
Definition: TargetLowering.h:4226
llvm::ISD::AVGCEILU
@ AVGCEILU
Definition: ISDOpcodes.h:649
llvm::MVT::v4i64
@ v4i64
Definition: MachineValueType.h:133
llvm::AArch64TargetLowering::getPromotedVTForPredicate
EVT getPromotedVTForPredicate(EVT VT) const
Definition: AArch64ISelLowering.cpp:24253
llvm::ISD::SETUNE
@ SETUNE
Definition: ISDOpcodes.h:1450
llvm::AArch64Subtarget::forceStreamingCompatibleSVE
bool forceStreamingCompatibleSVE() const
Definition: AArch64Subtarget.cpp:476
llvm::VectorType
Base class of all SIMD vector types.
Definition: DerivedTypes.h:389
G
const DataFlowGraph & G
Definition: RDFGraph.cpp:200
llvm::ISD::STRICT_FSETCCS
@ STRICT_FSETCCS
Definition: ISDOpcodes.h:476
areExtractExts
static bool areExtractExts(Value *Ext1, Value *Ext2)
Check if Ext1 and Ext2 are extends of the same type, doubling the bitwidth of the vector elements.
Definition: AArch64ISelLowering.cpp:13835
llvm::AArch64::SMEMatrixTileQ
@ SMEMatrixTileQ
Definition: AArch64InstrInfo.h:592
llvm::tgtok::In
@ In
Definition: TGLexer.h:51
llvm::TargetLoweringBase::setOperationAction
void setOperationAction(unsigned Op, MVT VT, LegalizeAction Action)
Indicate that the specified operation does not work with the specified type and indicate what to do a...
Definition: TargetLowering.h:2373
llvm::AtomicOrdering
AtomicOrdering
Atomic ordering for LLVM's memory model.
Definition: AtomicOrdering.h:56
LowerPREFETCH
static SDValue LowerPREFETCH(SDValue Op, SelectionDAG &DAG)
Definition: AArch64ISelLowering.cpp:3853
llvm::MVT::nxv16i8
@ nxv16i8
Definition: MachineValueType.h:218
llvm::ISD::STRICT_FMINIMUM
@ STRICT_FMINIMUM
Definition: ISDOpcodes.h:435
llvm::AArch64ISD::FCVTZU_MERGE_PASSTHRU
@ FCVTZU_MERGE_PASSTHRU
Definition: AArch64ISelLowering.h:144
llvm::TargetLowering::C_Other
@ C_Other
Definition: TargetLowering.h:4626
llvm::TargetLoweringBase::MaxStoresPerMemcpyOptSize
unsigned MaxStoresPerMemcpyOptSize
Likewise for functions with the OptSize attribute.
Definition: TargetLowering.h:3453
llvm::DataLayout::isBigEndian
bool isBigEndian() const
Definition: DataLayout.h:239
VectorUtils.h
llvm::TargetLowering::softenSetCCOperands
void softenSetCCOperands(SelectionDAG &DAG, EVT VT, SDValue &NewLHS, SDValue &NewRHS, ISD::CondCode &CCCode, const SDLoc &DL, const SDValue OldLHS, const SDValue OldRHS) const
Soften the operands of a comparison.
Definition: TargetLowering.cpp:289
llvm::AtomicRMWInst::UMin
@ UMin
*p = old <unsigned v ? old : v
Definition: Instructions.h:752
llvm::ISD::SMIN
@ SMIN
[US]{MIN/MAX} - Binary minimum or maximum of signed or unsigned integers.
Definition: ISDOpcodes.h:660
performUzpCombine
static SDValue performUzpCombine(SDNode *N, SelectionDAG &DAG)
Definition: AArch64ISelLowering.cpp:19028
llvm::ISD::WRITE_REGISTER
@ WRITE_REGISTER
Definition: ISDOpcodes.h:119
llvm::ISD::FMINIMUM
@ FMINIMUM
FMINIMUM/FMAXIMUM - NaN-propagating minimum/maximum that also treat -0.0 as less than 0....
Definition: ISDOpcodes.h:955
llvm::Function::hasFnAttribute
bool hasFnAttribute(Attribute::AttrKind Kind) const
Return true if the function has the attribute.
Definition: Function.cpp:640
llvm::TargetLoweringBase::getMaximumJumpTableSize
unsigned getMaximumJumpTableSize() const
Return upper limit for number of entries in a jump table.
Definition: TargetLoweringBase.cpp:2007
llvm::cl::opt< bool >
llvm::MaskedScatterSDNode::getValue
const SDValue & getValue() const
Definition: SelectionDAGNodes.h:2887
llvm::AArch64ISD::GLD1_IMM_MERGE_ZERO
@ GLD1_IMM_MERGE_ZERO
Definition: AArch64ISelLowering.h:379
tryCombineCRC32
static SDValue tryCombineCRC32(unsigned Mask, SDNode *N, SelectionDAG &DAG)
Definition: AArch64ISelLowering.cpp:17883
llvm::ISD::AVGCEILS
@ AVGCEILS
AVGCEILS/AVGCEILU - Rounding averaging add - Add two integers using an integer of type i[N+2],...
Definition: ISDOpcodes.h:648
llvm::APFloat
Definition: APFloat.h:744
getCSETCondCode
static std::optional< AArch64CC::CondCode > getCSETCondCode(SDValue Op)
Definition: AArch64ISelLowering.cpp:17471
llvm::AArch64_AM::isAdvSIMDModImmType1
static bool isAdvSIMDModImmType1(uint64_t Imm)
Definition: AArch64AddressingModes.h:452
getPTest
static SDValue getPTest(SelectionDAG &DAG, EVT VT, SDValue Pg, SDValue Op, AArch64CC::CondCode Cond)
Definition: AArch64ISelLowering.cpp:18026
llvm::AArch64ISD::SPLICE
@ SPLICE
Definition: AArch64ISelLowering.h:207
llvm::LoadInst::getPointerAddressSpace
unsigned getPointerAddressSpace() const
Returns the address space of the pointer operand.
Definition: Instructions.h:270
GenericSetCCInfo::CC
ISD::CondCode CC
Definition: AArch64ISelLowering.cpp:17102
performSignExtendSetCCCombine
static SDValue performSignExtendSetCCCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, SelectionDAG &DAG)
Definition: AArch64ISelLowering.cpp:18511
isPredicateCCSettingOp
static bool isPredicateCCSettingOp(SDValue N)
Definition: AArch64ISelLowering.cpp:16566
llvm::TargetLoweringBase::getIRStackGuard
virtual Value * getIRStackGuard(IRBuilderBase &IRB) const
If the target has a standard location for the stack protector guard, returns the address of that loca...
Definition: TargetLoweringBase.cpp:1957
llvm::CC_AArch64_DarwinPCS_ILP32_VarArg
bool CC_AArch64_DarwinPCS_ILP32_VarArg(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, CCState &State)
llvm::AArch64TargetLowering::preferredShiftLegalizationStrategy
ShiftLegalizationStrategy preferredShiftLegalizationStrategy(SelectionDAG &DAG, SDNode *N, unsigned ExpansionFactor) const override
Definition: AArch64ISelLowering.cpp:23005
llvm::SDValue::getNumOperands
unsigned getNumOperands() const
Definition: SelectionDAGNodes.h:1145
SetCCInfoAndKind::Info
SetCCInfo Info
Definition: AArch64ISelLowering.cpp:17121
llvm::AArch64TargetLowering::isIntDivCheap
bool isIntDivCheap(EVT VT, AttributeList Attr) const override
Return true if integer divide is usually cheaper than a sequence of several shifts,...
Definition: AArch64ISelLowering.cpp:23061
llvm::isNullFPConstant
bool isNullFPConstant(SDValue V)
Returns true if V is an FP constant with a value of positive zero.
Definition: SelectionDAG.cpp:10922
llvm::ISD::SADDO
@ SADDO
RESULT, BOOL = [SU]ADDO(LHS, RHS) - Overflow-aware nodes for addition.
Definition: ISDOpcodes.h:323
llvm::AArch64ISD::CMHI
@ CMHI
Definition: AArch64ISelLowering.h:229
llvm::TargetLoweringBase::getFrameIndexTy
MVT getFrameIndexTy(const DataLayout &DL) const
Return the type for frame index, which is determined by the alloca address space specified through th...
Definition: TargetLowering.h:378
llvm::PPC::Predicate
Predicate
Predicate - These are "(BI << 5) | BO" for various predicates.
Definition: PPCPredicates.h:26
llvm::AArch64ISD::ST3post
@ ST3post
Definition: AArch64ISelLowering.h:452
llvm::StoreInst
An instruction for storing to memory.
Definition: Instructions.h:301
llvm::TargetLoweringBase::setMaxDivRemBitWidthSupported
void setMaxDivRemBitWidthSupported(unsigned SizeInBits)
Set the size in bits of the maximum div/rem the backend supports.
Definition: TargetLowering.h:2552
llvm::peekThroughBitcasts
SDValue peekThroughBitcasts(SDValue V)
Return the non-bitcasted source operand of V if it exists.
Definition: SelectionDAG.cpp:11003
isCMP
static bool isCMP(SDValue Op)
Definition: AArch64ISelLowering.cpp:17464
llvm::GlobalValue
Definition: GlobalValue.h:44
AArch64CallingConvention.h
llvm::MipsISD::Ext
@ Ext
Definition: MipsISelLowering.h:159
llvm::AArch64TargetLowering::shouldExpandAtomicLoadInIR
TargetLoweringBase::AtomicExpansionKind shouldExpandAtomicLoadInIR(LoadInst *LI) const override
Returns how the given (atomic) load should be expanded by the IR-level AtomicExpand pass.
Definition: AArch64ISelLowering.cpp:22701
llvm::Constant
This is an important base class in LLVM.
Definition: Constant.h:41
llvm::SmallSet::count
size_type count(const T &V) const
count - Return 1 if the element is in the set, 0 otherwise.
Definition: SmallSet.h:164
llvm::MVT::i64x8
@ i64x8
Definition: MachineValueType.h:293
llvm::AArch64ISD::SDOT
@ SDOT
Definition: AArch64ISelLowering.h:260
llvm::AArch64TargetLowering::AArch64TargetLowering
AArch64TargetLowering(const TargetMachine &TM, const AArch64Subtarget &STI)
Definition: AArch64ISelLowering.cpp:327
llvm::MVT::v16i8
@ v16i8
Definition: MachineValueType.h:89
getAArch64Cmp
static SDValue getAArch64Cmp(SDValue LHS, SDValue RHS, ISD::CondCode CC, SDValue &AArch64cc, SelectionDAG &DAG, const SDLoc &dl)
Definition: AArch64ISelLowering.cpp:3453
unsigned
Class for arbitrary precision integers APInt is a functional replacement for common case unsigned integer type like unsigned
Definition: tmp.txt:1
llvm::AArch64ISD::SST1_SXTW_SCALED_PRED
@ SST1_SXTW_SCALED_PRED
Definition: AArch64ISelLowering.h:422
llvm::AArch64ISD::FRECPE
@ FRECPE
Definition: AArch64ISelLowering.h:313
llvm::Instruction::eraseFromParent
SymbolTableList< Instruction >::iterator eraseFromParent()
This method unlinks 'this' from the containing basic block and deletes it.
Definition: Instruction.cpp:82
llvm::SelectionDAG::addCallSiteInfo
void addCallSiteInfo(const SDNode *Node, CallSiteInfoImpl &&CallInfo)
Set CallSiteInfo to be associated with Node.
Definition: SelectionDAG.h:2247
type
AMD64 Optimization Manual has some nice information about optimizing integer multiplication by a constant How much of it applies to Intel s X86 implementation There are definite trade offs to xmm0 cvttss2siq rdx jb L3 subss xmm0 rax cvttss2siq rdx xorq rdx rax ret instead of xmm1 cvttss2siq rcx movaps xmm2 subss xmm2 cvttss2siq rax rdx xorq rax ucomiss xmm0 cmovb rax ret Seems like the jb branch has high likelihood of being taken It would have saved a few instructions It s not possible to reference and DH registers in an instruction requiring REX prefix divb and mulb both produce results in AH If isel emits a CopyFromReg which gets turned into a movb and that can be allocated a r8b r15b To get around isel emits a CopyFromReg from AX and then right shift it down by and truncate it It s not pretty but it works We need some register allocation magic to make the hack go which would often require a callee saved register Callees usually need to keep this value live for most of their body so it doesn t add a significant burden on them We currently implement this in however this is suboptimal because it means that it would be quite awkward to implement the optimization for callers A better implementation would be to relax the LLVM IR rules for sret arguments to allow a function with an sret argument to have a non void return type
Definition: README-X86-64.txt:70
llvm::AArch64ISD::INDEX_VECTOR
@ INDEX_VECTOR
Definition: AArch64ISelLowering.h:349
llvm::AArch64ISD::SQSHL_I
@ SQSHL_I
Definition: AArch64ISelLowering.h:215
llvm::AArch64ISD::ZERO_EXTEND_INREG_MERGE_PASSTHRU
@ ZERO_EXTEND_INREG_MERGE_PASSTHRU
Definition: AArch64ISelLowering.h:147
llvm::ISD::FLOG10
@ FLOG10
Definition: ISDOpcodes.h:920
llvm::ISD::VECREDUCE_FMIN
@ VECREDUCE_FMIN
Definition: ISDOpcodes.h:1280
llvm::SelectionDAG::SplitVector
std::pair< SDValue, SDValue > SplitVector(const SDValue &N, const SDLoc &DL, const EVT &LoVT, const EVT &HiVT)
Split the vector with EXTRACT_SUBVECTOR using the provides VTs and return the low/high part.
Definition: SelectionDAG.cpp:11666
TargetCallingConv.h
llvm::EVT::getSizeInBits
TypeSize getSizeInBits() const
Return the size of the specified value type in bits.
Definition: ValueTypes.h:340
llvm::CCValAssign::SExt
@ SExt
Definition: CallingConvLower.h:35
llvm::AArch64ISD::SDIV_PRED
@ SDIV_PRED
Definition: AArch64ISelLowering.h:112
getGatherVecOpcode
unsigned getGatherVecOpcode(bool IsScaled, bool IsSigned, bool NeedsExtend)
Definition: AArch64ISelLowering.cpp:5349
llvm::MVT::v16i16
@ v16i16
Definition: MachineValueType.h:102
Index
uint32_t Index
Definition: ELFObjHandler.cpp:83
llvm::AArch64II::MO_DLLIMPORT
@ MO_DLLIMPORT
MO_DLLIMPORT - On a symbol operand, this represents that the reference to the symbol is for an import...
Definition: AArch64BaseInfo.h:758
llvm::MachineInstr
Representation of each machine instruction.
Definition: MachineInstr.h:66
llvm::MachineInstrBuilder
Definition: MachineInstrBuilder.h:69
llvm::AArch64ISD::FMINNM_PRED
@ FMINNM_PRED
Definition: AArch64ISelLowering.h:102
llvm::MVT::v2i64
@ v2i64
Definition: MachineValueType.h:131
llvm::TargetLoweringBase::setHasExtractBitsInsn
void setHasExtractBitsInsn(bool hasExtractInsn=true)
Tells the code generator that the target has BitExtract instructions.
Definition: TargetLowering.h:2339
llvm::MVT::v2f16
@ v2f16
Definition: MachineValueType.h:147
llvm::TLSModel::LocalDynamic
@ LocalDynamic
Definition: CodeGen.h:47
llvm::AArch64TargetLowering::isMaskAndCmp0FoldingBeneficial
bool isMaskAndCmp0FoldingBeneficial(const Instruction &AndI) const override
Return if the target supports combining a chain like:
Definition: AArch64ISelLowering.cpp:22978
llvm::MachineFrameInfo::getObjectSize
int64_t getObjectSize(int ObjectIdx) const
Return the size of the specified object.
Definition: MachineFrameInfo.h:470
RuntimeLibcalls.h
llvm::ISD::FP_TO_UINT
@ FP_TO_UINT
Definition: ISDOpcodes.h:820
llvm::AArch64::rmMask
@ rmMask
Definition: AArch64ISelLowering.h:501
llvm::ConstantFPSDNode
Definition: SelectionDAGNodes.h:1635
llvm::AArch64Subtarget::useSVEForFixedLengthVectors
bool useSVEForFixedLengthVectors() const
Definition: AArch64Subtarget.h:381
llvm::Function::getCallingConv
CallingConv::ID getCallingConv() const
getCallingConv()/setCallingConv(CC) - These method get and set the calling convention of this functio...
Definition: Function.h:237
llvm::TargetLoweringBase::isTypeLegal
bool isTypeLegal(EVT VT) const
Return true if the target has native support for the specified value type.
Definition: TargetLowering.h:942
llvm::MemSDNode::getMemOperand
MachineMemOperand * getMemOperand() const
Return a MachineMemOperand object describing the memory reference performed by operation.
Definition: SelectionDAGNodes.h:1359
Scaled
@ Scaled
Definition: ARCInstrInfo.cpp:35
llvm::AArch64::SVEBitsPerBlock
static constexpr unsigned SVEBitsPerBlock
Definition: AArch64BaseInfo.h:835
llvm::GlobalValue::getParent
Module * getParent()
Get the module that this global value is contained inside of...
Definition: GlobalValue.h:652
llvm::CallingConv::CXX_FAST_TLS
@ CXX_FAST_TLS
Used for access functions.
Definition: CallingConv.h:72
llvm::find
auto find(R &&Range, const T &Val)
Provide wrappers to std::find which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1755
isExtendOrShiftOperand
static bool isExtendOrShiftOperand(SDValue N)
Definition: AArch64ISelLowering.cpp:17628
llvm::AArch64::createFastISel
FastISel * createFastISel(FunctionLoweringInfo &funcInfo, const TargetLibraryInfo *libInfo)
Definition: AArch64FastISel.cpp:5144
llvm::ISD::VECREDUCE_ADD
@ VECREDUCE_ADD
Integer reductions may have a result type larger than the vector element type.
Definition: ISDOpcodes.h:1284
llvm::ISD::LOAD
@ LOAD
LOAD and STORE have token chains as their first operand, then the same operands as an LLVM load/store...
Definition: ISDOpcodes.h:965
llvm::ISD::VECREDUCE_SMAX
@ VECREDUCE_SMAX
Definition: ISDOpcodes.h:1289
llvm::AArch64ISD::LD1RQ_MERGE_ZERO
@ LD1RQ_MERGE_ZERO
Definition: AArch64ISelLowering.h:364
AArch64ExpandImm.h
llvm::MachineFrameInfo::hasStackProtectorIndex
bool hasStackProtectorIndex() const
Definition: MachineFrameInfo.h:360
isConcatMask
static bool isConcatMask(ArrayRef< int > Mask, EVT VT, bool SplitLHS)
Definition: AArch64ISelLowering.cpp:10981
llvm::ARM_MB::ST
@ ST
Definition: ARMBaseInfo.h:73
Addr
uint64_t Addr
Definition: ELFObjHandler.cpp:79
llvm::SelectionDAG::getIntPtrConstant
SDValue getIntPtrConstant(uint64_t Val, const SDLoc &DL, bool isTarget=false)
Definition: SelectionDAG.cpp:1660
llvm::AArch64Subtarget::getChkStkName
const char * getChkStkName() const
Definition: AArch64Subtarget.h:393
llvm::TruncInst
This class represents a truncation of integer types.
Definition: Instructions.h:4813
isCMN
static bool isCMN(SDValue Op, ISD::CondCode CC)
Definition: AArch64ISelLowering.cpp:3053
llvm::APInt::sdivrem
static void sdivrem(const APInt &LHS, const APInt &RHS, APInt &Quotient, APInt &Remainder)
Definition: APInt.cpp:1888
llvm::ISD::AssertZext
@ AssertZext
Definition: ISDOpcodes.h:62
llvm::AArch64ISD::SST1_IMM_PRED
@ SST1_IMM_PRED
Definition: AArch64ISelLowering.h:423
llvm::LegalityPredicates::all
Predicate all(Predicate P0, Predicate P1)
True iff P0 and P1 are true.
Definition: LegalizerInfo.h:228
llvm::TargetLoweringBase::Promote
@ Promote
Definition: TargetLowering.h:198
llvm::AArch64ISD::DUPLANE128
@ DUPLANE128
Definition: AArch64ISelLowering.h:177
llvm::APInt::logBase2
unsigned logBase2() const
Definition: APInt.h:1700
llvm::ForwardedRegister
Describes a register that needs to be forwarded from the prologue to a musttail call.
Definition: CallingConvLower.h:144
Generic
@ Generic
Definition: AArch64MCAsmInfo.cpp:23
llvm::ISD::STRICT_LRINT
@ STRICT_LRINT
Definition: ISDOpcodes.h:432
llvm::AArch64TargetLowering::isProfitableToHoist
bool isProfitableToHoist(Instruction *I) const override
Check if it is profitable to hoist instruction in then/else to if.
Definition: AArch64ISelLowering.cpp:13660
llvm::AArch64TargetLowering::getSetCCResultType
EVT getSetCCResultType(const DataLayout &DL, LLVMContext &Context, EVT VT) const override
Return the ISD::SETCC ValueType.
Definition: AArch64ISelLowering.cpp:1976
llvm::SelectionDAG::getMaskedGather
SDValue getMaskedGather(SDVTList VTs, EVT MemVT, const SDLoc &dl, ArrayRef< SDValue > Ops, MachineMemOperand *MMO, ISD::MemIndexType IndexType, ISD::LoadExtType ExtTy)
Definition: SelectionDAG.cpp:9004
llvm::ISD::TRAP
@ TRAP
TRAP - Trapping instruction.
Definition: ISDOpcodes.h:1132
llvm::AArch64II::MO_NC
@ MO_NC
MO_NC - Indicates whether the linker is expected to check the symbol reference for overflow.
Definition: AArch64BaseInfo.h:747
llvm::DataLayout::isLittleEndian
bool isLittleEndian() const
Layout endianness...
Definition: DataLayout.h:238
llvm::MachinePointerInfo
This class contains a discriminated union of information about pointers in memory operands,...
Definition: MachineMemOperand.h:39
move
compiles ldr LCPI1_0 ldr ldr mov lsr tst moveq r1 ldr LCPI1_1 and r0 bx lr It would be better to do something like to fold the shift into the conditional move
Definition: README.txt:546
IsSVECntIntrinsic
static std::optional< unsigned > IsSVECntIntrinsic(SDValue S)
Definition: AArch64ISelLowering.cpp:15475
llvm::AArch64ISD::VASHR
@ VASHR
Definition: AArch64ISelLowering.h:212
llvm::ISD::isConstantSplatVector
bool isConstantSplatVector(const SDNode *N, APInt &SplatValue)
Node predicates.
Definition: SelectionDAG.cpp:141
llvm::LLVMContext
This is an important class for using LLVM in a threaded context.
Definition: LLVMContext.h:67
isConstant
static bool isConstant(const MachineInstr &MI)
Definition: AMDGPUInstructionSelector.cpp:2605
llvm::numbers::e
constexpr double e
Definition: MathExtras.h:31
llvm::AArch64TargetLowering::LowerOperation
SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const override
Provide custom lowering hooks for some operations.
Definition: AArch64ISelLowering.cpp:5834
llvm::AArch64II::MO_PAGEOFF
@ MO_PAGEOFF
MO_PAGEOFF - A symbol operand with this flag represents the offset of that symbol within a 4K page.
Definition: AArch64BaseInfo.h:711
llvm::ISD::SPONENTRY
@ SPONENTRY
SPONENTRY - Represents the llvm.sponentry intrinsic.
Definition: ISDOpcodes.h:106
llvm::MVT::v1f32
@ v1f32
Definition: MachineValueType.h:167
carryFlagToValue
static SDValue carryFlagToValue(SDValue Flag, EVT VT, SelectionDAG &DAG, bool Invert)
Definition: AArch64ISelLowering.cpp:3773
getPromotedVTForPredicate
static EVT getPromotedVTForPredicate(EVT VT)
Definition: AArch64ISelLowering.cpp:195
MemoryLocation.h
llvm::DenseMap
Definition: DenseMap.h:714
llvm::SelectionDAG::getCopyFromReg
SDValue getCopyFromReg(SDValue Chain, const SDLoc &dl, unsigned Reg, EVT VT)
Definition: SelectionDAG.h:795
llvm::ISD::SSUBO_CARRY
@ SSUBO_CARRY
Definition: ISDOpcodes.h:314
llvm::AArch64ISD::EXT
@ EXT
Definition: AArch64ISelLowering.h:206
llvm::TargetLowering::CallLoweringInfo::CallConv
CallingConv::ID CallConv
Definition: TargetLowering.h:4220
llvm::codeview::FrameCookieKind::Copy
@ Copy
llvm::ISD::OutputArg
OutputArg - This struct carries flags and a value for a single outgoing (actual) argument or outgoing...
Definition: TargetCallingConv.h:233
llvm::DemandedBits
Definition: DemandedBits.h:40
llvm::ISD::EXTRACT_VECTOR_ELT
@ EXTRACT_VECTOR_ELT
EXTRACT_VECTOR_ELT(VECTOR, IDX) - Returns a single element from VECTOR identified by the (potentially...
Definition: ISDOpcodes.h:534
llvm::ISD::VECREDUCE_FADD
@ VECREDUCE_FADD
These reductions have relaxed evaluation order semantics, and have a single vector operand.
Definition: ISDOpcodes.h:1276
llvm::AArch64::SMEMatrixArray
@ SMEMatrixArray
Definition: AArch64InstrInfo.h:593
llvm::TargetLoweringBase::setStackPointerRegisterToSaveRestore
void setStackPointerRegisterToSaveRestore(Register R)
If set to a physical register, this specifies the register that llvm.savestack/llvm....
Definition: TargetLowering.h:2322
llvm::MaskedGatherScatterSDNode::isIndexScaled
bool isIndexScaled() const
Definition: SelectionDAGNodes.h:2823
llvm::AArch64TargetLowering::CCAssignFnForCall
CCAssignFn * CCAssignFnForCall(CallingConv::ID CC, bool IsVarArg) const
Selects the correct CCAssignFn for a given CallingConvention value.
Definition: AArch64ISelLowering.cpp:6233
llvm::AArch64ISD::CSINV
@ CSINV
Definition: AArch64ISelLowering.h:83
llvm::SDNode::getOperand
const SDValue & getOperand(unsigned Num) const
Definition: SelectionDAGNodes.h:921
llvm::AArch64FunctionInfo::getVarArgsGPRIndex
int getVarArgsGPRIndex() const
Definition: AArch64MachineFunctionInfo.h:345
llvm::TargetLowering::CW_Register
@ CW_Register
Definition: TargetLowering.h:4640
llvm::EVT::getHalfSizedIntegerVT
EVT getHalfSizedIntegerVT(LLVMContext &Context) const
Finds the smallest simple value type that is greater than or equal to half the width of this EVT.
Definition: ValueTypes.h:397
llvm::AArch64FunctionInfo
AArch64FunctionInfo - This class is derived from MachineFunctionInfo and contains private AArch64-spe...
Definition: AArch64MachineFunctionInfo.h:39
llvm::AtomicOrdering::Unordered
@ Unordered
I
#define I(x, y, z)
Definition: MD5.cpp:58
llvm::ISD::LRINT
@ LRINT
Definition: ISDOpcodes.h:932
llvm::MCPhysReg
uint16_t MCPhysReg
An unsigned integer type large enough to represent all physical registers, but not necessarily virtua...
Definition: MCRegister.h:21
llvm::AArch64TargetLowering::shouldExpandAtomicStoreInIR
TargetLoweringBase::AtomicExpansionKind shouldExpandAtomicStoreInIR(StoreInst *SI) const override
Returns how the given (atomic) store should be expanded by the IR-level AtomicExpand pass into.
Definition: AArch64ISelLowering.cpp:22684
llvm::SelectionDAG::getNode
SDValue getNode(unsigned Opcode, const SDLoc &DL, EVT VT, ArrayRef< SDUse > Ops)
Gets or creates the specified node.
Definition: SelectionDAG.cpp:9202
llvm::TargetLowering::DAGCombinerInfo::isBeforeLegalize
bool isBeforeLegalize() const
Definition: TargetLowering.h:3944
llvm::FunctionType::getParamType
Type * getParamType(unsigned i) const
Parameter type accessors.
Definition: DerivedTypes.h:135
llvm::IRBuilderBase::getInt8PtrTy
PointerType * getInt8PtrTy(unsigned AddrSpace=0)
Fetch the type representing a pointer to an 8-bit integer value.
Definition: IRBuilder.h:560
llvm::AArch64FunctionInfo::setVarArgsStackOffset
void setVarArgsStackOffset(unsigned Offset)
Definition: AArch64MachineFunctionInfo.h:343
Analysis.h
llvm::SelectionDAG::getNOT
SDValue getNOT(const SDLoc &DL, SDValue Val, EVT VT)
Create a bitwise NOT operation as (XOR Val, -1).
Definition: SelectionDAG.cpp:1496
llvm::TargetLoweringBase::setPrefFunctionAlignment
void setPrefFunctionAlignment(Align Alignment)
Set the target's preferred function alignment.
Definition: TargetLowering.h:2525
llvm::MachineFrameInfo::computeMaxCallFrameSize
void computeMaxCallFrameSize(const MachineFunction &MF)
Computes the maximum size of a callframe and the AdjustsStack property.
Definition: MachineFrameInfo.cpp:187
llvm::AArch64ISD::FADDV_PRED
@ FADDV_PRED
Definition: AArch64ISelLowering.h:331
llvm::AArch64ISD::CMGT
@ CMGT
Definition: AArch64ISelLowering.h:228
llvm::AtomicRMWInst::Min
@ Min
*p = old <signed v ? old : v
Definition: Instructions.h:748
llvm::DenormalMode
Represent subnormal handling kind for floating point instruction inputs and outputs.
Definition: FloatingPointMode.h:69
llvm::SystemZISD::XC
@ XC
Definition: SystemZISelLowering.h:125
llvm::AArch64TargetLowering::emitAtomicCmpXchgNoStoreLLBalance
void emitAtomicCmpXchgNoStoreLLBalance(IRBuilderBase &Builder) const override
Definition: AArch64ISelLowering.cpp:22839
performMulVectorCmpZeroCombine
static SDValue performMulVectorCmpZeroCombine(SDNode *N, SelectionDAG &DAG)
Definition: AArch64ISelLowering.cpp:15624
llvm::cl::init
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:445
llvm::AArch64_AM::isAdvSIMDModImmType8
static bool isAdvSIMDModImmType8(uint64_t Imm)
Definition: AArch64AddressingModes.h:559
llvm::AArch64TargetLowering::shouldInsertFencesForAtomic
bool shouldInsertFencesForAtomic(const Instruction *I) const override
Whether AtomicExpandPass should automatically insert fences and reduce ordering for this atomic.
Definition: AArch64ISelLowering.cpp:22645
llvm::ISD::ATOMIC_CMP_SWAP
@ ATOMIC_CMP_SWAP
Val, OUTCHAIN = ATOMIC_CMP_SWAP(INCHAIN, ptr, cmp, swap) For double-word atomic operations: ValLo,...
Definition: ISDOpcodes.h:1172
llvm::AArch64ISD::FADDA_PRED
@ FADDA_PRED
Definition: AArch64ISelLowering.h:330
MCRegisterInfo.h
AArch64PerfectShuffle.h
llvm::ISD::UADDSAT
@ UADDSAT
Definition: ISDOpcodes.h:341
llvm::concatenateVectors
Value * concatenateVectors(IRBuilderBase &Builder, ArrayRef< Value * > Vecs)
Concatenate a list of vectors.
Definition: VectorUtils.cpp:1038
llvm::TargetStackID::ScalableVector
@ ScalableVector
Definition: TargetFrameLowering.h:30
llvm::ISD::ATOMIC_LOAD_ADD
@ ATOMIC_LOAD_ADD
Definition: ISDOpcodes.h:1187
llvm::AArch64Subtarget::hasSVEorSME
bool hasSVEorSME() const
Definition: AArch64Subtarget.h:364
llvm::AArch64TargetLowering::isVectorClearMaskLegal
bool isVectorClearMaskLegal(ArrayRef< int > M, EVT VT) const override
Similar to isShuffleMaskLegal.
Definition: AArch64ISelLowering.cpp:12928
llvm::ISD::SSUBSAT
@ SSUBSAT
RESULT = [US]SUBSAT(LHS, RHS) - Perform saturation subtraction on 2 integers with the same bit width ...
Definition: ISDOpcodes.h:349
llvm::ISD::FCOPYSIGN
@ FCOPYSIGN
FCOPYSIGN(X, Y) - Return the value of X with the sign of Y.
Definition: ISDOpcodes.h:492
llvm::LoadSDNode::getExtensionType
ISD::LoadExtType getExtensionType() const
Return whether this is a plain node, or one of the varieties of value-extending loads.
Definition: SelectionDAGNodes.h:2363
llvm::AArch64ISD::BICi
@ BICi
Definition: AArch64ISelLowering.h:189
llvm::is_contained
bool is_contained(R &&Range, const E &Element)
Wrapper function around std::find to detect if an element exists in a container.
Definition: STLExtras.h:1869
llvm::AtomicRMWInst::Or
@ Or
*p = old | v
Definition: Instructions.h:742
llvm::SDNode::dump
void dump() const
Dump this node, for debugging.
Definition: SelectionDAGDumper.cpp:554
llvm::MachineFunction::CreateMachineBasicBlock
MachineBasicBlock * CreateMachineBasicBlock(const BasicBlock *bb=nullptr)
CreateMachineBasicBlock - Allocate a new MachineBasicBlock.
Definition: MachineFunction.cpp:446
llvm::AArch64II::MO_NO_FLAG
@ MO_NO_FLAG
Definition: AArch64BaseInfo.h:699
llvm::SelectionDAG::getSExtOrTrunc
SDValue getSExtOrTrunc(SDValue Op, const SDLoc &DL, EVT VT)
Convert Op, which must be of integer type, to the integer type VT, by either sign-extending or trunca...
Definition: SelectionDAG.cpp:1440
llvm::MemSDNode::getOriginalAlign
Align getOriginalAlign() const
Returns alignment and volatility of the memory access.
Definition: SelectionDAGNodes.h:1292
llvm::TargetLowering::scalarizeVectorStore
SDValue scalarizeVectorStore(StoreSDNode *ST, SelectionDAG &DAG) const
Definition: TargetLowering.cpp:8970
ArrayRef.h
llvm::SDValue::getScalarValueSizeInBits
uint64_t getScalarValueSizeInBits() const
Definition: SelectionDAGNodes.h:203
llvm::SelectionDAG::getAnyExtOrTrunc
SDValue getAnyExtOrTrunc(SDValue Op, const SDLoc &DL, EVT VT)
Convert Op, which must be of integer type, to the integer type VT, by either any-extending or truncat...
Definition: SelectionDAG.cpp:1434
llvm::AArch64ISD::LD1RO_MERGE_ZERO
@ LD1RO_MERGE_ZERO
Definition: AArch64ISelLowering.h:365
llvm::EVT::getVectorMinNumElements
unsigned getVectorMinNumElements() const
Given a vector type, return the minimum number of elements it contains.
Definition: ValueTypes.h:331
llvm::AArch64ISD::PMULL
@ PMULL
Definition: AArch64ISelLowering.h:310
llvm::AArch64ISD::SMULL
@ SMULL
Definition: AArch64ISelLowering.h:307
OP_VTRNR
@ OP_VTRNR
Definition: ARMISelLowering.cpp:8325
GenericSetCCInfo
Helper structure to keep track of ISD::SET_CC operands.
Definition: AArch64ISelLowering.cpp:17099
isPow2Splat
static bool isPow2Splat(SDValue Op, uint64_t &SplatVal, bool &Negated)
Definition: AArch64ISelLowering.cpp:12818
getDUPLANEOp
static unsigned getDUPLANEOp(EVT EltType)
Definition: AArch64ISelLowering.cpp:11275
llvm::SelectionDAG::getAllOnesConstant
SDValue getAllOnesConstant(const SDLoc &DL, EVT VT, bool IsTarget=false, bool IsOpaque=false)
Definition: SelectionDAG.h:656
llvm::AArch64TargetLowering::isShuffleMaskLegal
bool isShuffleMaskLegal(ArrayRef< int > M, EVT VT) const override
Return true if the given shuffle mask can be codegen'd directly, or if it should be stack expanded.
Definition: AArch64ISelLowering.cpp:12898
llvm::CallingConv::AArch64_SME_ABI_Support_Routines_PreserveMost_From_X2
@ AArch64_SME_ABI_Support_Routines_PreserveMost_From_X2
Preserve X2-X15, X19-X29, SP, Z0-Z31, P0-P15.
Definition: CallingConv.h:238
llvm::GlobalAddressSDNode::getOffset
int64_t getOffset() const
Definition: SelectionDAGNodes.h:1776
llvm::MVT::v4f32
@ v4f32
Definition: MachineValueType.h:170
llvm::shuffle
void shuffle(Iterator first, Iterator last, RNG &&g)
Definition: STLExtras.h:1577
llvm::details::FixedOrScalableQuantity::getFixedValue
constexpr ScalarTy getFixedValue() const
Definition: TypeSize.h:182
llvm::pdb::PDB_MemoryType::Stack
@ Stack
llvm::TargetMachine::getTLSModel
TLSModel::Model getTLSModel(const GlobalValue *GV) const
Returns the TLS model which should be used for the given global variable.
Definition: TargetMachine.cpp:154
llvm::MachineMemOperand::Flags
Flags
Flags values. These may be or'd together.
Definition: MachineMemOperand.h:130
Enable
@ Enable
Definition: DwarfDebug.cpp:86
llvm::MVT::getVectorNumElements
unsigned getVectorNumElements() const
Definition: MachineValueType.h:911
llvm::ISD::MSTORE
@ MSTORE
Definition: ISDOpcodes.h:1212
llvm::ISD::STRICT_LROUND
@ STRICT_LROUND
Definition: ISDOpcodes.h:430
llvm::AArch64ISD::ABDS_PRED
@ ABDS_PRED
Definition: AArch64ISelLowering.h:94
llvm::StoreSDNode
This class is used to represent ISD::STORE nodes.
Definition: SelectionDAGNodes.h:2376
llvm::MachineFrameInfo::setHasTailCall
void setHasTailCall(bool V=true)
Definition: MachineFrameInfo.h:639
llvm::ISD::ZEXTLOAD
@ ZEXTLOAD
Definition: ISDOpcodes.h:1414
llvm::SDValue::getValue
SDValue getValue(unsigned R) const
Definition: SelectionDAGNodes.h:179
llvm::AArch64ISD::RHADDU_PRED
@ RHADDU_PRED
Definition: AArch64ISelLowering.h:111
llvm::MVT::i8
@ i8
Definition: MachineValueType.h:46
llvm::TargetMachine::Options
TargetOptions Options
Definition: TargetMachine.h:119
llvm::AArch64ISD::FCMGE
@ FCMGE
Definition: AArch64ISelLowering.h:232
llvm::ISD::SETOGT
@ SETOGT
Definition: ISDOpcodes.h:1438
llvm::AArch64TargetLowering::getOptimalMemOpLLT
LLT getOptimalMemOpLLT(const MemOp &Op, const AttributeList &FuncAttributes) const override
LLT returning variant.
Definition: AArch64ISelLowering.cpp:14776
combineSVEReductionFP
static SDValue combineSVEReductionFP(SDNode *N, unsigned Opc, SelectionDAG &DAG)
Definition: AArch64ISelLowering.cpp:18083
llvm::TargetLowering::CallLoweringInfo
This structure contains all information that is necessary for lowering calls.
Definition: TargetLowering.h:4198
IRBuilder.h
assert
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
llvm::SelectionDAG::getVectorShuffle
SDValue getVectorShuffle(EVT VT, const SDLoc &dl, SDValue N1, SDValue N2, ArrayRef< int > Mask)
Return an ISD::VECTOR_SHUFFLE node.
Definition: SelectionDAG.cpp:1964
llvm::EVT::getIntegerVT
static EVT getIntegerVT(LLVMContext &Context, unsigned BitWidth)
Returns the EVT that represents an integer with the given number of bits.
Definition: ValueTypes.h:64
tryCombineFixedPointConvert
static SDValue tryCombineFixedPointConvert(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, SelectionDAG &DAG)
Definition: AArch64ISelLowering.cpp:16983
llvm::SelectionDAG::getMaskedLoad
SDValue getMaskedLoad(EVT VT, const SDLoc &dl, SDValue Chain, SDValue Base, SDValue Offset, SDValue Mask, SDValue Src0, EVT MemVT, MachineMemOperand *MMO, ISD::MemIndexedMode AM, ISD::LoadExtType, bool IsExpanding=false)
Definition: SelectionDAG.cpp:8909
performInsertSubvectorCombine
static SDValue performInsertSubvectorCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, SelectionDAG &DAG)
Definition: AArch64ISelLowering.cpp:16942
llvm::AArch64_AM::encodeAdvSIMDModImmType2
static uint8_t encodeAdvSIMDModImmType2(uint64_t Imm)
Definition: AArch64AddressingModes.h:472
llvm::TargetMachine
Primary interface to the complete machine description for the target machine.
Definition: TargetMachine.h:78
llvm::PredicateConstraint
Constraint for a predicate of the form "cmp Pred Op, OtherOp", where Op is the value the constraint a...
Definition: PredicateInfo.h:75
llvm::AArch64ISD::SQSHLU_I
@ SQSHLU_I
Definition: AArch64ISelLowering.h:217
llvm::SelectionDAG::getAtomic
SDValue getAtomic(unsigned Opcode, const SDLoc &dl, EVT MemVT, SDValue Chain, SDValue Ptr, SDValue Val, MachineMemOperand *MMO)
Gets a node for an atomic op, produces result (if relevant) and chain and takes 2 operands.
Definition: SelectionDAG.cpp:7839
llvm::APInt::sge
bool sge(const APInt &RHS) const
Signed greater or equal comparison.
Definition: APInt.h:1215
llvm::ISD::MULHS
@ MULHS
Definition: ISDOpcodes.h:638
llvm::MachineFrameInfo::CreateFixedObject
int CreateFixedObject(uint64_t Size, int64_t SPOffset, bool IsImmutable, bool isAliased=false)
Create a new object at a fixed location on the stack.
Definition: MachineFrameInfo.cpp:83
llvm::FunctionLoweringInfo
FunctionLoweringInfo - This contains information that is global to a function that is used when lower...
Definition: FunctionLoweringInfo.h:52
llvm::MVT::Other
@ Other
Definition: MachineValueType.h:42
getTestBitOperand
static SDValue getTestBitOperand(SDValue Op, unsigned &Bit, bool &Invert, SelectionDAG &DAG)
Definition: AArch64ISelLowering.cpp:20571
performSubAddMULCombine
static SDValue performSubAddMULCombine(SDNode *N, SelectionDAG &DAG)
Definition: AArch64ISelLowering.cpp:17723
llvm::MVT::getSizeInBits
TypeSize getSizeInBits() const
Returns the size of the specified MVT in bits.
Definition: MachineValueType.h:925
llvm::AArch64ISD::EXTR
@ EXTR
Definition: AArch64ISelLowering.h:169
llvm::TargetLoweringBase::EnableExtLdPromotion
bool EnableExtLdPromotion
Definition: TargetLowering.h:3495
isVShiftRImm
static bool isVShiftRImm(SDValue Op, EVT VT, bool isNarrow, int64_t &Cnt)
isVShiftRImm - Check if this is a valid build_vector for the immediate operand of a vector shift righ...
Definition: AArch64ISelLowering.cpp:12968
std::swap
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition: BitVector.h:853
llvm::MachineFunction::getFrameInfo
MachineFrameInfo & getFrameInfo()
getFrameInfo - Return the frame info object for the current function.
Definition: MachineFunction.h:688
llvm::AArch64TargetLowering::insertCopiesSplitCSR
void insertCopiesSplitCSR(MachineBasicBlock *Entry, const SmallVectorImpl< MachineBasicBlock * > &Exits) const override
Insert explicit copies in entry and exit blocks.
Definition: AArch64ISelLowering.cpp:23020
llvm::ISD::SETULT
@ SETULT
Definition: ISDOpcodes.h:1448
llvm::ISD::DEBUGTRAP
@ DEBUGTRAP
DEBUGTRAP - Trap intended to get the attention of a debugger.
Definition: ISDOpcodes.h:1135
llvm::AtomicCmpXchgInst::getCompareOperand
Value * getCompareOperand()
Definition: Instructions.h:647
changeVectorFPCCToAArch64CC
static void changeVectorFPCCToAArch64CC(ISD::CondCode CC, AArch64CC::CondCode &CondCode, AArch64CC::CondCode &CondCode2, bool &Invert)
changeVectorFPCCToAArch64CC - Convert a DAG fp condition code to an AArch64 CC usable with the vector...
Definition: AArch64ISelLowering.cpp:3004
llvm::ConstantSDNode::getZExtValue
uint64_t getZExtValue() const
Definition: SelectionDAGNodes.h:1601
llvm::AtomicRMWInst::isFloatingPointOperation
bool isFloatingPointOperation() const
Definition: Instructions.h:889
llvm::AArch64ISD::GLDFF1_SXTW_SCALED_MERGE_ZERO
@ GLDFF1_SXTW_SCALED_MERGE_ZERO
Definition: AArch64ISelLowering.h:396
llvm::TargetLowering::expandShiftParts
void expandShiftParts(SDNode *N, SDValue &Lo, SDValue &Hi, SelectionDAG &DAG) const
Expand shift-by-parts.
Definition: TargetLowering.cpp:7656
llvm::MachineBasicBlock::getParent
const MachineFunction * getParent() const
Return the MachineFunction containing this basic block.
Definition: MachineBasicBlock.h:265
llvm::MVT::all_valuetypes
static auto all_valuetypes()
SimpleValueType Iteration.
Definition: MachineValueType.h:1520
llvm::ISD::STACKMAP
@ STACKMAP
Definition: ISDOpcodes.h:1297
llvm::MaskedLoadSDNode
This class is used to represent an MLOAD node.
Definition: SelectionDAGNodes.h:2658
LCALLNAME5
#define LCALLNAME5(A, B)
llvm::MVT::nxv4i32
@ nxv4i32
Definition: MachineValueType.h:231
llvm::TargetOptions::TLSSize
unsigned TLSSize
Bit size of immediate TLS offsets (0 == use the default).
Definition: TargetOptions.h:287
llvm::TargetLoweringBase::AtomicExpansionKind::LLSC
@ LLSC
llvm::generic_gep_type_iterator::getIndexedType
Type * getIndexedType() const
Definition: GetElementPtrTypeIterator.h:70
llvm::AArch64FunctionInfo::branchTargetEnforcement
bool branchTargetEnforcement() const
Definition: AArch64MachineFunctionInfo.h:435
llvm::AArch64ISD::LDFF1S_MERGE_ZERO
@ LDFF1S_MERGE_ZERO
Definition: AArch64ISelLowering.h:363
llvm::AArch64ISD::SRSHR_I
@ SRSHR_I
Definition: AArch64ISelLowering.h:218
llvm::AArch64ISD::SMIN_PRED
@ SMIN_PRED
Definition: AArch64ISelLowering.h:115
llvm::AArch64ISD::FNEG_MERGE_PASSTHRU
@ FNEG_MERGE_PASSTHRU
Definition: AArch64ISelLowering.h:133
llvm::codeview::CompileSym2Flags::EC
@ EC
llvm::SystemZISD::OC
@ OC
Definition: SystemZISelLowering.h:124
llvm::GlobalValue::isThreadLocal
bool isThreadLocal() const
If the value is "Thread Local", its value isn't shared by the threads.
Definition: GlobalValue.h:259
llvm::MachineMemOperand::MONonTemporal
@ MONonTemporal
The memory access is non-temporal.
Definition: MachineMemOperand.h:140
llvm::AArch64ISD::GLD1_SXTW_MERGE_ZERO
@ GLD1_SXTW_MERGE_ZERO
Definition: AArch64ISelLowering.h:376
Ptr
@ Ptr
Definition: TargetLibraryInfo.cpp:62
llvm::AArch64ISD::GLDFF1S_IMM_MERGE_ZERO
@ GLDFF1S_IMM_MERGE_ZERO
Definition: AArch64ISelLowering.h:406
canGuaranteeTCO
static bool canGuaranteeTCO(CallingConv::ID CC, bool GuaranteeTailCalls)
Return true if the calling convention is one that we can guarantee TCO for.
Definition: AArch64ISelLowering.cpp:6860
llvm::AArch64TargetLowering::isReassocProfitable
bool isReassocProfitable(SelectionDAG &DAG, SDValue N0, SDValue N1) const override
Control the following reassociation of operands: (op (op x, c1), y) -> (op (op x, y),...
Definition: AArch64ISelLowering.cpp:6216
llvm::ISD::CondCode
CondCode
ISD::CondCode enum - These are ordered carefully to make the bitfields below work out,...
Definition: ISDOpcodes.h:1434
llvm::MaskedStoreSDNode::getOffset
const SDValue & getOffset() const
Definition: SelectionDAGNodes.h:2712
llvm::AArch64TargetLowering::isOffsetFoldingLegal
bool isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const override
Return true if folding a constant offset with the given GlobalAddress is legal.
Definition: AArch64ISelLowering.cpp:9757
llvm::AArch64FunctionInfo::getVarArgsGPRSize
unsigned getVarArgsGPRSize() const
Definition: AArch64MachineFunctionInfo.h:348
llvm::AArch64Subtarget::isCallingConvWin64
bool isCallingConvWin64(CallingConv::ID CC) const
Definition: AArch64Subtarget.h:325
Mode
SI Whole Quad Mode
Definition: SIWholeQuadMode.cpp:262
llvm::AArch64TargetLowering::isFPImmLegal
bool isFPImmLegal(const APFloat &Imm, EVT VT, bool ForCodeSize) const override
Returns true if the target can instruction select the specified FP immediate natively.
Definition: AArch64ISelLowering.cpp:9764
performFirstTrueTestVectorCombine
static SDValue performFirstTrueTestVectorCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, const AArch64Subtarget *Subtarget)
Definition: AArch64ISelLowering.cpp:16587
llvm::SelectionDAG::getMachineNode
MachineSDNode * getMachineNode(unsigned Opcode, const SDLoc &dl, EVT VT)
These are used for target selectors to create a new node with specified return type(s),...
Definition: SelectionDAG.cpp:9946
llvm::AArch64FunctionInfo::getVarArgsStackOffset
unsigned getVarArgsStackOffset() const
Definition: AArch64MachineFunctionInfo.h:342
performNVCASTCombine
static SDValue performNVCASTCombine(SDNode *N)
Get rid of unnecessary NVCASTs (that don't change the type).
Definition: AArch64ISelLowering.cpp:20865
replaceSplatVectorStore
static SDValue replaceSplatVectorStore(SelectionDAG &DAG, StoreSDNode &St)
Replace a splat of a scalar to a vector store by scalar stores of the scalar value.
Definition: AArch64ISelLowering.cpp:18845
tryAdvSIMDModImm32
static SDValue tryAdvSIMDModImm32(unsigned NewOp, SDValue Op, SelectionDAG &DAG, const APInt &Bits, const SDValue *LHS=nullptr)
Definition: AArch64ISelLowering.cpp:11792
LCALLNAME4
#define LCALLNAME4(A, B)
llvm::ISD::VECREDUCE_AND
@ VECREDUCE_AND
Definition: ISDOpcodes.h:1286
llvm::SelectionDAG::getBitcast
SDValue getBitcast(EVT VT, SDValue V)
Return a bitcast using the SDLoc of the value operand, and casting to the provided type.
Definition: SelectionDAG.cpp:2250
llvm::MVT::getFloatingPointVT
static MVT getFloatingPointVT(unsigned BitWidth)
Definition: MachineValueType.h:1229
llvm::AArch64CC::LAST_ACTIVE
@ LAST_ACTIVE
Definition: AArch64BaseInfo.h:277
isEquivalentMaskless
static bool isEquivalentMaskless(unsigned CC, unsigned width, ISD::LoadExtType ExtType, int AddConstant, int CompConstant)
Definition: AArch64ISelLowering.cpp:19961
foldTruncStoreOfExt
static SDValue foldTruncStoreOfExt(SelectionDAG &DAG, SDNode *N)
Definition: AArch64ISelLowering.cpp:19370
llvm::ISD::RETURNADDR
@ RETURNADDR
Definition: ISDOpcodes.h:95
llvm::MVT
Machine Value Type.
Definition: MachineValueType.h:31
llvm::MachineInstrBuilder::addReg
const MachineInstrBuilder & addReg(Register RegNo, unsigned flags=0, unsigned SubReg=0) const
Add a new virtual register operand.
Definition: MachineInstrBuilder.h:97
llvm::AArch64ISD::FSQRT_MERGE_PASSTHRU
@ FSQRT_MERGE_PASSTHRU
Definition: AArch64ISelLowering.h:138
llvm::FastISel
This is a fast-path instruction selection class that generates poor code and doesn't support illegal ...
Definition: FastISel.h:66
llvm::AArch64ISD::GLDFF1_SXTW_MERGE_ZERO
@ GLDFF1_SXTW_MERGE_ZERO
Definition: AArch64ISelLowering.h:394
llvm::MVT::nxv4bf16
@ nxv4bf16
Definition: MachineValueType.h:255
llvm::ISD::SUBCARRY
@ SUBCARRY
Definition: ISDOpcodes.h:304
getConstantLaneNumOfExtractHalfOperand
static std::optional< uint64_t > getConstantLaneNumOfExtractHalfOperand(SDValue &Op)
Definition: AArch64ISelLowering.cpp:4395
llvm::AArch64FunctionInfo::getVarArgsFPRSize
unsigned getVarArgsFPRSize() const
Definition: AArch64MachineFunctionInfo.h:354
llvm::Module
A Module instance is used to store all the information related to an LLVM module.
Definition: Module.h:65
MatchRegisterName
static unsigned MatchRegisterName(StringRef Name)
llvm::SelectionDAG::setNodeMemRefs
void setNodeMemRefs(MachineSDNode *N, ArrayRef< MachineMemOperand * > NewMemRefs)
Mutate the specified machine node's memory references to the provided list.
Definition: SelectionDAG.cpp:9714
llvm::EVT::widenIntegerVectorElementType
EVT widenIntegerVectorElementType(LLVMContext &Context) const
Return a VT for an integer vector type with the size of the elements doubled.
Definition: ValueTypes.h:411
llvm::CodeGenOpt::None
@ None
-O0
Definition: CodeGen.h:58
llvm::StoreSDNode::getValue
const SDValue & getValue() const
Definition: SelectionDAGNodes.h:2397
tryAdvSIMDModImm321s
static SDValue tryAdvSIMDModImm321s(unsigned NewOp, SDValue Op, SelectionDAG &DAG, const APInt &Bits)
Definition: AArch64ISelLowering.cpp:11888
llvm::AArch64ISD::BSP
@ BSP
Definition: AArch64ISelLowering.h:194
llvm::KnownBits::ashr
static KnownBits ashr(const KnownBits &LHS, const KnownBits &RHS)
Compute known bits for ashr(LHS, RHS).
Definition: KnownBits.cpp:274
llvm::AArch64ISD::ADCS
@ ADCS
Definition: AArch64ISelLowering.h:156
llvm::ISD::STRICT_LLRINT
@ STRICT_LLRINT
Definition: ISDOpcodes.h:433
llvm::AArch64_AM::encodeAdvSIMDModImmType12
static uint8_t encodeAdvSIMDModImmType12(uint64_t Imm)
Definition: AArch64AddressingModes.h:712
llvm::ISD::SRA_PARTS
@ SRA_PARTS
Definition: ISDOpcodes.h:750
llvm::AArch64ISD::FIRST_NUMBER
@ FIRST_NUMBER
Definition: AArch64ISelLowering.h:51
llvm::ISD::VASTART
@ VASTART
Definition: ISDOpcodes.h:1086
isWideDUPMask
static bool isWideDUPMask(ArrayRef< int > M, EVT VT, unsigned BlockSize, unsigned &DupLaneOp)
Check if a vector shuffle corresponds to a DUP instructions with a larger element width than the vect...
Definition: AArch64ISelLowering.cpp:10706
performGatherLoadCombine
static SDValue performGatherLoadCombine(SDNode *N, SelectionDAG &DAG, unsigned Opcode, bool OnlyPackedOffsets=true)
Definition: AArch64ISelLowering.cpp:21088
llvm::MachinePointerInfo::getWithOffset
MachinePointerInfo getWithOffset(int64_t O) const
Definition: MachineMemOperand.h:79
info
lazy value info
Definition: LazyValueInfo.cpp:58
llvm::AArch64ISD::LDNF1_MERGE_ZERO
@ LDNF1_MERGE_ZERO
Definition: AArch64ISelLowering.h:360
llvm::AArch64TargetLowering::mergeStoresAfterLegalization
bool mergeStoresAfterLegalization(EVT VT) const override
SVE code generation for fixed length vectors does not custom lower BUILD_VECTOR.
Definition: AArch64ISelLowering.cpp:6144
Builder
assume Assume Builder
Definition: AssumeBundleBuilder.cpp:651
llvm::MVT::bf16
@ bf16
Definition: MachineValueType.h:55
InstructionCost.h
llvm::SelectionDAG::getGlobalAddress
SDValue getGlobalAddress(const GlobalValue *GV, const SDLoc &DL, EVT VT, int64_t offset=0, bool isTargetGA=false, unsigned TargetFlags=0)
Definition: SelectionDAG.cpp:1732
llvm::AArch64ISD::GLD1_UXTW_SCALED_MERGE_ZERO
@ GLD1_UXTW_SCALED_MERGE_ZERO
Definition: AArch64ISelLowering.h:377
llvm::PatternMatch::m_Value
class_match< Value > m_Value()
Match an arbitrary value and ignore it.
Definition: PatternMatch.h:76
llvm::TargetLowering::CW_Default
@ CW_Default
Definition: TargetLowering.h:4643
llvm::operator==
bool operator==(const AddressRangeValuePair &LHS, const AddressRangeValuePair &RHS)
Definition: AddressRanges.h:153
llvm::ZExtInst
This class represents zero extension of integer types.
Definition: Instructions.h:4852
llvm::APInt
Class for arbitrary precision integers.
Definition: APInt.h:75
llvm::MachineFunction
Definition: MachineFunction.h:258
MAKE_CASE
#define MAKE_CASE(V)
llvm::SelectionDAG::getCALLSEQ_END
SDValue getCALLSEQ_END(SDValue Chain, SDValue Op1, SDValue Op2, SDValue InGlue, const SDLoc &DL)
Return a new CALLSEQ_END node, which always must have a glue result (to ensure it's not CSE'd).
Definition: SelectionDAG.h:1034
llvm::CCState::resultsCompatible
static bool resultsCompatible(CallingConv::ID CalleeCC, CallingConv::ID CallerCC, MachineFunction &MF, LLVMContext &C, const SmallVectorImpl< ISD::InputArg > &Ins, CCAssignFn CalleeFn, CCAssignFn CallerFn)
Returns true if the results of the two calling conventions are compatible.
Definition: CallingConvLower.cpp:258
llvm::ISD::VSCALE
@ VSCALE
VSCALE(IMM) - Returns the runtime scaling factor used to calculate the number of elements within a sc...
Definition: ISDOpcodes.h:1253
llvm::isAcquireOrStronger
bool isAcquireOrStronger(AtomicOrdering AO)
Definition: AtomicOrdering.h:128
performSunpkloCombine
static SDValue performSunpkloCombine(SDNode *N, SelectionDAG &DAG)
Definition: AArch64ISelLowering.cpp:19229
llvm::MachineFunction::addLiveIn
Register addLiveIn(MCRegister PReg, const TargetRegisterClass *RC)
addLiveIn - Add the specified physical register as a live-in value and create a corresponding virtual...
Definition: MachineFunction.cpp:694
Triple.h
llvm::AArch64ISD::LD1LANEpost
@ LD1LANEpost
Definition: AArch64ISelLowering.h:464
llvm::AArch64TargetLowering::enableAggressiveFMAFusion
bool enableAggressiveFMAFusion(EVT VT) const override
Enable aggressive FMA fusion on targets that want it.
Definition: AArch64ISelLowering.cpp:23087
llvm::MVT::getVectorVT
static MVT getVectorVT(MVT VT, unsigned NumElements)
Definition: MachineValueType.h:1269
tryCombineShiftImm
static SDValue tryCombineShiftImm(unsigned IID, SDNode *N, SelectionDAG &DAG)
Definition: AArch64ISelLowering.cpp:17812
llvm::AArch64Subtarget::classifyGlobalFunctionReference
unsigned classifyGlobalFunctionReference(const GlobalValue *GV, const TargetMachine &TM) const
Definition: AArch64Subtarget.cpp:401
llvm::AArch64ISD::CLASTB_N
@ CLASTB_N
Definition: AArch64ISelLowering.h:324
llvm::CC_AArch64_Win64_CFGuard_Check
bool CC_AArch64_Win64_CFGuard_Check(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, CCState &State)
llvm::MVT::nxv2i32
@ nxv2i32
Definition: MachineValueType.h:230
llvm::ISD::STRICT_FROUNDEVEN
@ STRICT_FROUNDEVEN
Definition: ISDOpcodes.h:428
LowerSVEIntrinsicEXT
static SDValue LowerSVEIntrinsicEXT(SDNode *N, SelectionDAG &DAG)
Definition: AArch64ISelLowering.cpp:17936
llvm::ISD::STRICT_FEXP
@ STRICT_FEXP
Definition: ISDOpcodes.h:416
llvm::PatternMatch::m_SExt
CastClass_match< OpTy, Instruction::SExt > m_SExt(const OpTy &Op)
Matches SExt.
Definition: PatternMatch.h:1623
llvm::ISD::ConstantPool
@ ConstantPool
Definition: ISDOpcodes.h:82
llvm::AArch64ISD::ANDV_PRED
@ ANDV_PRED
Definition: AArch64ISelLowering.h:277
TargetOptions.h
llvm::AArch64ISD::REVH_MERGE_PASSTHRU
@ REVH_MERGE_PASSTHRU
Definition: AArch64ISelLowering.h:344
performLDNT1Combine
static SDValue performLDNT1Combine(SDNode *N, SelectionDAG &DAG)
Definition: AArch64ISelLowering.cpp:18673
llvm::ISD::FMAXIMUM
@ FMAXIMUM
Definition: ISDOpcodes.h:956
llvm::AArch64TargetLowering::isDesirableToCommuteXorWithShift
bool isDesirableToCommuteXorWithShift(const SDNode *N) const override
Returns false if N is a bit extraction pattern of (X >> C) & Mask.
Definition: AArch64ISelLowering.cpp:15005
llvm::ISD::GlobalTLSAddress
@ GlobalTLSAddress
Definition: ISDOpcodes.h:79
llvm::ISD::UBSANTRAP
@ UBSANTRAP
UBSANTRAP - Trap with an immediate describing the kind of sanitizer failure.
Definition: ISDOpcodes.h:1139
llvm::AArch64ISD::BITREVERSE_MERGE_PASSTHRU
@ BITREVERSE_MERGE_PASSTHRU
Definition: AArch64ISelLowering.h:342
llvm::AArch64_AM::encodeLogicalImmediate
static uint64_t encodeLogicalImmediate(uint64_t imm, unsigned regSize)
encodeLogicalImmediate - Return the encoded immediate value for a logical immediate instruction of th...
Definition: AArch64AddressingModes.h:283
llvm::TargetLowering::CallLoweringInfo::DAG
SelectionDAG & DAG
Definition: TargetLowering.h:4223
llvm::BuildVectorSDNode::isConstant
bool isConstant() const
Definition: SelectionDAG.cpp:12030
llvm::AArch64ISD::LDNF1S_MERGE_ZERO
@ LDNF1S_MERGE_ZERO
Definition: AArch64ISelLowering.h:361
isEXTMask
static bool isEXTMask(ArrayRef< int > M, EVT VT, bool &ReverseEXT, unsigned &Imm)
Definition: AArch64ISelLowering.cpp:10779
llvm::Sched::Source
@ Source
Definition: TargetLowering.h:100
llvm::MemSDNode::isAtomic
bool isAtomic() const
Return true if the memory operation ordering is Unordered or higher.
Definition: SelectionDAGNodes.h:1344
llvm::AArch64CC::GE
@ GE
Definition: AArch64BaseInfo.h:265
llvm::AArch64ISD::STRICT_FCMPE
@ STRICT_FCMPE
Definition: AArch64ISelLowering.h:445
llvm::MVT::fixedlen_vector_valuetypes
static auto fixedlen_vector_valuetypes()
Definition: MachineValueType.h:1542
performUADDVAddCombine
static SDValue performUADDVAddCombine(SDValue A, SelectionDAG &DAG)
Definition: AArch64ISelLowering.cpp:15307
performTBZCombine
static SDValue performTBZCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, SelectionDAG &DAG)
Definition: AArch64ISelLowering.cpp:20644
combineSVEReductionInt
static SDValue combineSVEReductionInt(SDNode *N, unsigned Opc, SelectionDAG &DAG)
Definition: AArch64ISelLowering.cpp:18064
llvm::AArch64Subtarget::hasCustomCallingConv
bool hasCustomCallingConv() const
Definition: AArch64Subtarget.h:222
llvm::getSVEPredPatternFromNumElements
std::optional< unsigned > getSVEPredPatternFromNumElements(unsigned MinNumElts)
Return specific VL predicate pattern based on the number of elements.
Definition: AArch64BaseInfo.h:506
llvm::ISD::STRICT_FCOS
@ STRICT_FCOS
Definition: ISDOpcodes.h:415
llvm::ArrayRef
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition: APInt.h:33
llvm::EVT::isVector
bool isVector() const
Return true if this is a vector value type.
Definition: ValueTypes.h:154
performUnpackCombine
static SDValue performUnpackCombine(SDNode *N, SelectionDAG &DAG, const AArch64Subtarget *Subtarget)
Definition: AArch64ISelLowering.cpp:18979
llvm::ISD::isNormalLoad
bool isNormalLoad(const SDNode *N)
Returns true if the specified node is a non-extending and unindexed load.
Definition: SelectionDAGNodes.h:3049
llvm::ISD::UMAX
@ UMAX
Definition: ISDOpcodes.h:663
llvm::AArch64ISD::LD4post
@ LD4post
Definition: AArch64ISelLowering.h:450
llvm::AArch64_AM::getFP64Imm
static int getFP64Imm(const APInt &Imm)
getFP64Imm - Return an 8-bit floating-point version of the 64-bit floating-point value.
Definition: AArch64AddressingModes.h:422
llvm::ISD::PRE_INC
@ PRE_INC
Definition: ISDOpcodes.h:1383
llvm::AArch64Subtarget::isXRegisterReserved
bool isXRegisterReserved(size_t i) const
Definition: AArch64Subtarget.h:211
llvm::ConstantPoolSDNode
Definition: SelectionDAGNodes.h:1887
llvm::MachineFrameInfo::setStackID
void setStackID(int ObjectIdx, uint8_t ID)
Definition: MachineFrameInfo.h:736
llvm::SelectionDAG::getSelect
SDValue getSelect(const SDLoc &DL, EVT VT, SDValue Cond, SDValue LHS, SDValue RHS)
Helper function to make it easier to build Select's if you just have operands and don't want to check...
Definition: SelectionDAG.h:1196
llvm::BlockAddressSDNode::getBlockAddress
const BlockAddress * getBlockAddress() const
Definition: SelectionDAGNodes.h:2212
llvm::APInt::getAllOnesValue
static APInt getAllOnesValue(unsigned numBits)
NOTE: This is soft-deprecated. Please use getAllOnes() instead.
Definition: APInt.h:219
llvm::min
Expected< ExpressionValue > min(const ExpressionValue &Lhs, const ExpressionValue &Rhs)
Definition: FileCheck.cpp:357
Mul
BinaryOperator * Mul
Definition: X86PartialReduction.cpp:70
llvm::AArch64ISD::CMEQ
@ CMEQ
Definition: AArch64ISelLowering.h:226
llvm::ISD::STRICT_FTRUNC
@ STRICT_FTRUNC
Definition: ISDOpcodes.h:429
llvm::any_of
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1742
llvm::BuildVectorSDNode::isConstantSplat
bool isConstantSplat(APInt &SplatValue, APInt &SplatUndef, unsigned &SplatBitSize, bool &HasAnyUndefs, unsigned MinSplatBits=0, bool isBigEndian=false) const
Check if this is a constant splat, and if so, find the smallest element size that splats the vector.
Definition: SelectionDAG.cpp:11740
llvm::AArch64TargetLowering::isOpSuitableForRCPC3
bool isOpSuitableForRCPC3(const Instruction *I) const
Definition: AArch64ISelLowering.cpp:22628
DataLayout.h
llvm::MVT::i64
@ i64
Definition: MachineValueType.h:49
llvm::MachineFrameInfo::CreateStackObject
int CreateStackObject(uint64_t Size, Align Alignment, bool isSpillSlot, const AllocaInst *Alloca=nullptr, uint8_t ID=0)
Create a new statically sized stack object, returning a nonnegative identifier to represent it.
Definition: MachineFrameInfo.cpp:51
llvm::StructType
Class to represent struct types.
Definition: DerivedTypes.h:213
llvm::details::FixedOrScalableQuantity::getKnownMinValue
constexpr ScalarTy getKnownMinValue() const
Returns the minimum value this quantity can represent.
Definition: TypeSize.h:163
llvm::AArch64TargetLowering::fallBackToDAGISel
bool fallBackToDAGISel(const Instruction &Inst) const override
Definition: AArch64ISelLowering.cpp:23180
Cond
SmallVector< MachineOperand, 4 > Cond
Definition: BasicBlockSections.cpp:137
llvm::MVT::v2i32
@ v2i32
Definition: MachineValueType.h:110
llvm::AArch64ISD::GLDFF1_UXTW_SCALED_MERGE_ZERO
@ GLDFF1_UXTW_SCALED_MERGE_ZERO
Definition: AArch64ISelLowering.h:395
llvm::StringRef
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:50
llvm::TargetLowering::LowerToTLSEmulatedModel
virtual SDValue LowerToTLSEmulatedModel(const GlobalAddressSDNode *GA, SelectionDAG &DAG) const
Lower TLS global address SDNode for target independent emulated TLS model.
Definition: TargetLowering.cpp:9440
llvm::TargetLowering::LowerAsmOperandForConstraint
virtual void LowerAsmOperandForConstraint(SDValue Op, std::string &Constraint, std::vector< SDValue > &Ops, SelectionDAG &DAG) const
Lower the specified operand into the Ops vector.
Definition: TargetLowering.cpp:5232
llvm::SelectionDAG::getTargetInsertSubreg
SDValue getTargetInsertSubreg(int SRIdx, const SDLoc &DL, EVT VT, SDValue Operand, SDValue Subreg)
A convenience function for creating TargetInstrInfo::INSERT_SUBREG nodes.
Definition: SelectionDAG.cpp:10074
llvm::EVT::getScalarSizeInBits
uint64_t getScalarSizeInBits() const
Definition: ValueTypes.h:352
llvm::MachineBasicBlock::splice
void splice(iterator Where, MachineBasicBlock *Other, iterator From)
Take an instruction from MBB 'Other' at the position From, and insert it into this MBB right before '...
Definition: MachineBasicBlock.h:1037
llvm::AArch64ISD::DUPLANE8
@ DUPLANE8
Definition: AArch64ISelLowering.h:173
llvm::PatternMatch::m_Undef
auto m_Undef()
Match an arbitrary undef constant.
Definition: PatternMatch.h:136
isAddSubSExt
static bool isAddSubSExt(SDNode *N, SelectionDAG &DAG)
Definition: AArch64ISelLowering.cpp:4471
llvm::BuildVectorSDNode
A "pseudo-class" with methods for operating on BUILD_VECTORs.
Definition: SelectionDAGNodes.h:1992
MBBI
MachineBasicBlock MachineBasicBlock::iterator MBBI
Definition: AArch64SLSHardening.cpp:75
llvm::AArch64Subtarget::getTargetTriple
const Triple & getTargetTriple() const
Definition: AArch64Subtarget.h:190
isMergePassthruOpcode
static bool isMergePassthruOpcode(unsigned Opc)
Definition: AArch64ISelLowering.cpp:226
llvm::logicalview::LVAttributeKind::Zero
@ Zero
llvm::ISD::ABDU
@ ABDU
Definition: ISDOpcodes.h:656
llvm::Offset
@ Offset
Definition: DWP.cpp:406
llvm::isReleaseOrStronger
bool isReleaseOrStronger(AtomicOrdering AO)
Definition: AtomicOrdering.h:132
llvm::Sched::Hybrid
@ Hybrid
Definition: TargetLowering.h:102
isOrXorChain
static bool isOrXorChain(SDValue N, unsigned &Num, SmallVector< std::pair< SDValue, SDValue >, 16 > &WorkList)
Definition: AArch64ISelLowering.cpp:8835
llvm::ISD::STRICT_SINT_TO_FP
@ STRICT_SINT_TO_FP
STRICT_[US]INT_TO_FP - Convert a signed or unsigned integer to a floating point value.
Definition: ISDOpcodes.h:448
llvm_unreachable
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
Definition: ErrorHandling.h:143
FPRArgRegs
static const MCPhysReg FPRArgRegs[]
Definition: AArch64ISelLowering.cpp:147
llvm::Value::getType
Type * getType() const
All values are typed, get the type of this value.
Definition: Value.h:255
llvm::ISD::SREM
@ SREM
Definition: ISDOpcodes.h:244
NormalizeBuildVector
static SDValue NormalizeBuildVector(SDValue Op, SelectionDAG &DAG)
Definition: AArch64ISelLowering.cpp:12125
llvm::TargetLowering::DAGCombinerInfo::isAfterLegalizeDAG
bool isAfterLegalizeDAG() const
Definition: TargetLowering.h:3946
llvm::ISD::LLRINT
@ LLRINT
Definition: ISDOpcodes.h:933
emitComparison
static SDValue emitComparison(SDValue LHS, SDValue RHS, ISD::CondCode CC, const SDLoc &dl, SelectionDAG &DAG)
Definition: AArch64ISelLowering.cpp:3079
llvm::Instruction::getFunction
const Function * getFunction() const
Return the function this instruction belongs to.
Definition: Instruction.cpp:74
llvm::ISD::UMUL_LOHI
@ UMUL_LOHI
Definition: ISDOpcodes.h:251
llvm::PatternMatch::m_Shuffle
TwoOps_match< V1_t, V2_t, Instruction::ShuffleVector > m_Shuffle(const V1_t &v1, const V2_t &v2)
Matches ShuffleVectorInst independently of mask value.
Definition: PatternMatch.h:1551
llvm::SelectionDAG::ReplaceAllUsesOfValueWith
void ReplaceAllUsesOfValueWith(SDValue From, SDValue To)
Replace any uses of From with To, leaving uses of other values produced by From.getNode() alone.
Definition: SelectionDAG.cpp:10541
llvm::MVT::v2f32
@ v2f32
Definition: MachineValueType.h:168
llvm::ISD::ATOMIC_LOAD_SUB
@ ATOMIC_LOAD_SUB
Definition: ISDOpcodes.h:1188
llvm::TargetLoweringBase::AddrMode::BaseGV
GlobalValue * BaseGV
Definition: TargetLowering.h:2596
llvm::Value::replaceAllUsesWith
void replaceAllUsesWith(Value *V)
Change all uses of this to point to a new Value.
Definition: Value.cpp:532
hasPairwiseAdd
static bool hasPairwiseAdd(unsigned Opcode, EVT VT, bool FullFP16)
Definition: AArch64ISelLowering.cpp:16551
llvm::AArch64TargetLowering::getScalarShiftAmountTy
MVT getScalarShiftAmountTy(const DataLayout &DL, EVT) const override
Return the type to use for a scalar shift opcode, given the shifted amount type.
Definition: AArch64ISelLowering.cpp:2254
llvm::ISD::STRICT_FSUB
@ STRICT_FSUB
Definition: ISDOpcodes.h:401
uint32_t
SetCCInfo::Generic
GenericSetCCInfo Generic
Definition: AArch64ISelLowering.cpp:17113
llvm::StackOffset
StackOffset holds a fixed and a scalable offset in bytes.
Definition: TypeSize.h:36
llvm::AArch64ISD::MOVI
@ MOVI
Definition: AArch64ISelLowering.h:180
llvm::AArch64ISD::ADDS
@ ADDS
Definition: AArch64ISelLowering.h:154
Compiler.h
llvm::AArch64FunctionInfo::setVarArgsGPRIndex
void setVarArgsGPRIndex(int Index)
Definition: AArch64MachineFunctionInfo.h:346
llvm::TargetLoweringBase::IsStrictFPEnabled
bool IsStrictFPEnabled
Definition: TargetLowering.h:3507
llvm::AArch64ISD::ADR
@ ADR
Definition: AArch64ISelLowering.h:76
llvm::AArch64FunctionInfo::getForwardedMustTailRegParms
SmallVectorImpl< ForwardedRegister > & getForwardedMustTailRegParms()
Definition: AArch64MachineFunctionInfo.h:406
llvm::MVT::is128BitVector
bool is128BitVector() const
Return true if this is a 128-bit vector type.
Definition: MachineValueType.h:433
llvm::ISD::ArgFlagsTy
Definition: TargetCallingConv.h:27
llvm::AArch64ISD::REV64
@ REV64
Definition: AArch64ISelLowering.h:205
llvm::TargetLoweringBase::MaxStoresPerMemmoveOptSize
unsigned MaxStoresPerMemmoveOptSize
Likewise for functions with the OptSize attribute.
Definition: TargetLowering.h:3488
llvm::MaskedGatherScatterSDNode::getBasePtr
const SDValue & getBasePtr() const
Definition: SelectionDAGNodes.h:2832
llvm::SDValue::getOperand
const SDValue & getOperand(unsigned i) const
Definition: SelectionDAGNodes.h:1149
llvm::IRBuilderBase
Common base class shared among various IRBuilders.
Definition: IRBuilder.h:94
llvm::ilist_node_impl::getIterator
self_iterator getIterator()
Definition: ilist_node.h:82
DL
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
Definition: AArch64SLSHardening.cpp:76
llvm::AArch64Subtarget::getMaxBytesForLoopAlignment
unsigned getMaxBytesForLoopAlignment() const
Definition: AArch64Subtarget.h:249
llvm::VTSDNode
This class is used to represent EVT's, which are used to parameterize some operations.
Definition: SelectionDAGNodes.h:2297
llvm::AArch64ISD::CMGTz
@ CMGTz
Definition: AArch64ISelLowering.h:238
llvm::TargetLowering::DAGCombinerInfo::isCalledByLegalizer
bool isCalledByLegalizer() const
Definition: TargetLowering.h:3948
S
add sub stmia L5 ldr r0 bl L_printf $stub Instead of a and a wouldn t it be better to do three moves *Return an aggregate type is even return S
Definition: README.txt:210
llvm::ConstantSDNode::getSExtValue
int64_t getSExtValue() const
Definition: SelectionDAGNodes.h:1602
performGLD1Combine
static SDValue performGLD1Combine(SDNode *N, SelectionDAG &DAG)
Definition: AArch64ISelLowering.cpp:19149
llvm::ISD::UNDEF
@ UNDEF
UNDEF - An undefined node.
Definition: ISDOpcodes.h:211
llvm::ISD::FEXP
@ FEXP
Definition: ISDOpcodes.h:921
llvm::SDValue::hasOneUse
bool hasOneUse() const
Return true if there is exactly one node using value ResNo of Node.
Definition: SelectionDAGNodes.h:1185
CC
auto CC
Definition: RISCVRedundantCopyElimination.cpp:79
llvm::AArch64ISD::SSTNT1_PRED
@ SSTNT1_PRED
Definition: AArch64ISelLowering.h:426
llvm::ISD::SMUL_LOHI
@ SMUL_LOHI
SMUL_LOHI/UMUL_LOHI - Multiply two integers of type iN, producing a signed/unsigned value of type i[2...
Definition: ISDOpcodes.h:250
llvm::AArch64RegisterInfo
Definition: AArch64RegisterInfo.h:26
llvm::ISD::BUILTIN_OP_END
@ BUILTIN_OP_END
BUILTIN_OP_END - This must be the last enum value in this list.
Definition: ISDOpcodes.h:1311
llvm::AArch64ISD::LD1DUPpost
@ LD1DUPpost
Definition: AArch64ISelLowering.h:460
llvm::TargetLowering::getRegForInlineAsmConstraint
virtual std::pair< unsigned, const TargetRegisterClass * > getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI, StringRef Constraint, MVT VT) const
Given a physical register constraint (e.g.
Definition: TargetLowering.cpp:5315
llvm::ConstantVector::get
static Constant * get(ArrayRef< Constant * > V)
Definition: Constants.cpp:1356
SetCCInfoAndKind
Helper structure to be able to read SetCC information.
Definition: AArch64ISelLowering.cpp:17120
llvm::AArch64ISD::FMA_PRED
@ FMA_PRED
Definition: AArch64ISelLowering.h:98
llvm::AArch64ISD::LS64_EXTRACT
@ LS64_EXTRACT
Definition: AArch64ISelLowering.h:356
llvm::TargetLoweringBase::setTruncStoreAction
void setTruncStoreAction(MVT ValVT, MVT MemVT, LegalizeAction Action)
Indicate that the specified truncating store does not work with the specified type and indicate what ...
Definition: TargetLowering.h:2412
performDUPCombine
static SDValue performDUPCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI)
Definition: AArch64ISelLowering.cpp:20846
llvm::ISD::getSetCCInverse
CondCode getSetCCInverse(CondCode Operation, EVT Type)
Return the operation corresponding to !(X op Y), where 'op' is a valid SetCC operation.
Definition: SelectionDAG.cpp:582
llvm::AArch64TargetLowering::getTargetNodeName
const char * getTargetNodeName(unsigned Opcode) const override
This method returns the name of a target specific DAG node.
Definition: AArch64ISelLowering.cpp:2315
llvm::SDValue::getSimpleValueType
MVT getSimpleValueType() const
Return the simple ValueType of the referenced return value.
Definition: SelectionDAGNodes.h:190
llvm::MVT::v4i32
@ v4i32
Definition: MachineValueType.h:112
llvm::SDNode::ops
ArrayRef< SDUse > ops() const
Definition: SelectionDAGNodes.h:930
llvm::ISD::FEXP2
@ FEXP2
Definition: ISDOpcodes.h:922
llvm::ISD::STRICT_FP_EXTEND
@ STRICT_FP_EXTEND
X = STRICT_FP_EXTEND(Y) - Extend a smaller FP type into a larger FP type.
Definition: ISDOpcodes.h:469
llvm::CCState::CheckReturn
bool CheckReturn(const SmallVectorImpl< ISD::OutputArg > &Outs, CCAssignFn Fn)
CheckReturn - Analyze the return values of a function, returning true if the return can be performed ...
Definition: CallingConvLower.cpp:96
llvm::Pass::dump
void dump() const
Definition: Pass.cpp:136
llvm::MCRegisterInfo
MCRegisterInfo base class - We assume that the target defines a static array of MCRegisterDesc object...
Definition: MCRegisterInfo.h:135
performSetCCPunpkCombine
static SDValue performSetCCPunpkCombine(SDNode *N, SelectionDAG &DAG)
Definition: AArch64ISelLowering.cpp:20486
llvm::AArch64ISD::SVE_LD2_MERGE_ZERO
@ SVE_LD2_MERGE_ZERO
Definition: AArch64ISelLowering.h:368
llvm::AArch64ISD::ABDU_PRED
@ ABDU_PRED
Definition: AArch64ISelLowering.h:95
llvm::AArch64ISD::CBNZ
@ CBNZ
Definition: AArch64ISelLowering.h:284
performST1Combine
static SDValue performST1Combine(SDNode *N, SelectionDAG &DAG)
Definition: AArch64ISelLowering.cpp:18720
llvm::SDVTList
This represents a list of ValueType's that has been intern'd by a SelectionDAG.
Definition: SelectionDAGNodes.h:79
llvm::Type::getContext
LLVMContext & getContext() const
Return the LLVMContext in which this type was uniqued.
Definition: Type.h:129
llvm::CCValAssign::FPExt
@ FPExt
Definition: CallingConvLower.h:49
llvm::MVT::is64BitVector
bool is64BitVector() const
Return true if this is a 64-bit vector type.
Definition: MachineValueType.h:424
llvm::LoadInst
An instruction for reading from memory.
Definition: Instructions.h:177
llvm::ISD::VECREDUCE_XOR
@ VECREDUCE_XOR
Definition: ISDOpcodes.h:1288
llvm::CodeModel::Tiny
@ Tiny
Definition: CodeGen.h:31
llvm::ISD::FMUL
@ FMUL
Definition: ISDOpcodes.h:392
LoadOps
This is used by foldLoadsRecursive() to capture a Root Load node which is of type or(load,...
Definition: AggressiveInstCombine.cpp:611
llvm::SelectionDAG::getTargetConstantPool
SDValue getTargetConstantPool(const Constant *C, EVT VT, MaybeAlign Align=std::nullopt, int Offset=0, unsigned TargetFlags=0)
Definition: SelectionDAG.h:733
llvm::AArch64CC::EQ
@ EQ
Definition: AArch64BaseInfo.h:255
llvm::AArch64TargetLowering::emitStoreConditional
Value * emitStoreConditional(IRBuilderBase &Builder, Value *Val, Value *Addr, AtomicOrdering Ord) const override
Perform a store-conditional operation to Addr.
Definition: AArch64ISelLowering.cpp:22845
llvm::MachineMemOperand::MOVolatile
@ MOVolatile
The memory access is volatile.
Definition: MachineMemOperand.h:138
llvm::ISD::ATOMIC_SWAP
@ ATOMIC_SWAP
Val, OUTCHAIN = ATOMIC_SWAP(INCHAIN, ptr, amt) Val, OUTCHAIN = ATOMIC_LOAD_[OpName](INCHAIN,...
Definition: ISDOpcodes.h:1186
llvm::AArch64ISD::ORV_PRED
@ ORV_PRED
Definition: AArch64ISelLowering.h:275
llvm::ISD::VECTOR_SPLICE
@ VECTOR_SPLICE
VECTOR_SPLICE(VEC1, VEC2, IMM) - Returns a subvector of the same type as VEC1/VEC2 from CONCAT_VECTOR...
Definition: ISDOpcodes.h:598
llvm::MaskedGatherScatterSDNode::getIndexType
ISD::MemIndexType getIndexType() const
How is Index applied to BasePtr when computing addresses.
Definition: SelectionDAGNodes.h:2820
AArch64ISelLowering.h
llvm::TargetLoweringBase::ShiftLegalizationStrategy
ShiftLegalizationStrategy
Return the preferred strategy to legalize tihs SHIFT instruction, with ExpansionFactor being the recu...
Definition: TargetLowering.h:926
llvm::AArch64TargetLowering::shouldFoldConstantShiftPairToMask
bool shouldFoldConstantShiftPairToMask(const SDNode *N, CombineLevel Level) const override
Return true if it is profitable to fold a pair of shifts into a mask.
Definition: AArch64ISelLowering.cpp:15029
llvm::ISD::SEXTLOAD
@ SEXTLOAD
Definition: ISDOpcodes.h:1414
llvm::SelectionDAG::getBuildVector
SDValue getBuildVector(EVT VT, const SDLoc &DL, ArrayRef< SDValue > Ops)
Return an ISD::BUILD_VECTOR node.
Definition: SelectionDAG.h:824
tryToConvertShuffleOfTbl2ToTbl4
static SDValue tryToConvertShuffleOfTbl2ToTbl4(SDValue Op, ArrayRef< int > ShuffleMask, SelectionDAG &DAG)
Definition: AArch64ISelLowering.cpp:11428
llvm::AArch64::SMEMatrixTileB
@ SMEMatrixTileB
Definition: AArch64InstrInfo.h:588
llvm::IRBuilderBase::CreateConstGEP1_32
Value * CreateConstGEP1_32(Type *Ty, Value *Ptr, unsigned Idx0, const Twine &Name="")
Definition: IRBuilder.h:1796
llvm::SelectionDAG::getConstantFP
SDValue getConstantFP(double Val, const SDLoc &DL, EVT VT, bool isTarget=false)
Create a ConstantFPSDNode wrapping a constant value.
Definition: SelectionDAG.cpp:1714
llvm::Intrinsic::getDeclaration
Function * getDeclaration(Module *M, ID id, ArrayRef< Type * > Tys=std::nullopt)
Create or insert an LLVM Function declaration for an intrinsic, and return it.
Definition: Function.cpp:1502
llvm::ComplexDeinterleavingOperation::Shuffle
@ Shuffle
llvm::AtomicRMWInst
an instruction that atomically reads a memory location, combines it with another value,...
Definition: Instructions.h:718
llvm::MachinePointerInfo::getAddrSpace
unsigned getAddrSpace() const
Return the LLVM IR address space number that this pointer points into.
Definition: MachineOperand.cpp:1024
OP_VEXT3
@ OP_VEXT3
Definition: ARMISelLowering.cpp:8319
llvm::MachineMemOperand::MOLoad
@ MOLoad
The memory access reads data.
Definition: MachineMemOperand.h:134
MRI
unsigned const MachineRegisterInfo * MRI
Definition: AArch64AdvSIMDScalarPass.cpp:105
llvm::AArch64ISD::UZP2
@ UZP2
Definition: AArch64ISelLowering.h:200
llvm::TargetLowering::C_RegisterClass
@ C_RegisterClass
Definition: TargetLowering.h:4622
llvm::isNullOrNullSplat
bool isNullOrNullSplat(const MachineInstr &MI, const MachineRegisterInfo &MRI, bool AllowUndefs=false)
Return true if the value is a constant 0 integer or a splatted vector of a constant 0 integer (with n...
Definition: Utils.cpp:1224
llvm::Register
Wrapper class representing virtual and physical registers.
Definition: Register.h:19
llvm::ISD::INTRINSIC_WO_CHAIN
@ INTRINSIC_WO_CHAIN
RESULT = INTRINSIC_WO_CHAIN(INTRINSICID, arg1, arg2, ...) This node represents a target intrinsic fun...
Definition: ISDOpcodes.h:184
llvm::ISD::XOR
@ XOR
Definition: ISDOpcodes.h:668
llvm::TargetLoweringBase::ArgListTy
std::vector< ArgListEntry > ArgListTy
Definition: TargetLowering.h:323
llvm::MachineBasicBlock::addLiveIn
void addLiveIn(MCRegister PhysReg, LaneBitmask LaneMask=LaneBitmask::getAll())
Adds the specified register as a live in.
Definition: MachineBasicBlock.h:408
llvm::SMEAttrs::SM_Compatible
@ SM_Compatible
Definition: AArch64SMEAttributes.h:32
llvm::LoadSDNode::getBasePtr
const SDValue & getBasePtr() const
Definition: SelectionDAGNodes.h:2367
llvm::AArch64TargetLowering::createComplexDeinterleavingIR
Value * createComplexDeinterleavingIR(Instruction *I, ComplexDeinterleavingOperation OperationType, ComplexDeinterleavingRotation Rotation, Value *InputA, Value *InputB, Value *Accumulator=nullptr) const override
Create the IR node for the given complex deinterleaving operation.
Definition: AArch64ISelLowering.cpp:24353
llvm::SelectionDAG::getTargetJumpTable
SDValue getTargetJumpTable(int JTI, EVT VT, unsigned TargetFlags=0)
Definition: SelectionDAG.h:727
llvm::FunctionCallee::getCallee
Value * getCallee()
Definition: DerivedTypes.h:184
llvm::find_if
auto find_if(R &&Range, UnaryPredicate P)
Provide wrappers to std::find_if which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1762
combineAcrossLanesIntrinsic
static SDValue combineAcrossLanesIntrinsic(unsigned Opc, SDNode *N, SelectionDAG &DAG)
Definition: AArch64ISelLowering.cpp:17896
llvm::ConstantInt::getZExtValue
uint64_t getZExtValue() const
Return the constant as a 64-bit unsigned integer value after it has been zero extended as appropriate...
Definition: Constants.h:141
llvm::AArch64TargetLowering::shouldConvertConstantLoadToIntImm
bool shouldConvertConstantLoadToIntImm(const APInt &Imm, Type *Ty) const override
Returns true if it is beneficial to convert a load of a constant to just the constant itself.
Definition: AArch64ISelLowering.cpp:15051
MVT_CC
static const MVT MVT_CC
Value type used for condition codes.
Definition: AArch64ISelLowering.cpp:142
llvm::ISD::FRAMEADDR
@ FRAMEADDR
FRAMEADDR, RETURNADDR - These nodes represent llvm.frameaddress and llvm.returnaddress on the DAG.
Definition: ISDOpcodes.h:94
llvm::AArch64ISD::PTRUE
@ PTRUE
Definition: AArch64ISelLowering.h:340
performVecReduceAddCombine
static SDValue performVecReduceAddCombine(SDNode *N, SelectionDAG &DAG, const AArch64Subtarget *ST)
Definition: AArch64ISelLowering.cpp:15204
llvm::MVT::v16i32
@ v16i32
Definition: MachineValueType.h:121
llvm::APInt::uadd_ov
APInt uadd_ov(const APInt &RHS, bool &Overflow) const
Definition: APInt.cpp:1933
llvm::RetCC_AArch64_WebKit_JS
bool RetCC_AArch64_WebKit_JS(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, CCState &State)
Callee
amdgpu Simplify well known AMD library false FunctionCallee Callee
Definition: AMDGPULibCalls.cpp:187
llvm::AtomicOrdering::Release
@ Release
llvm::AtomicSDNode
This is an SDNode representing atomic operations.
Definition: SelectionDAGNodes.h:1444
ObjCARCUtil.h
llvm::MVT::v1i32
@ v1i32
Definition: MachineValueType.h:109
llvm::ISD::isIntEqualitySetCC
bool isIntEqualitySetCC(CondCode Code)
Return true if this is a setcc instruction that performs an equality comparison when used with intege...
Definition: ISDOpcodes.h:1479
llvm::AArch64ISD::ANDS
@ ANDS
Definition: AArch64ISelLowering.h:158
llvm::ISD::FSQRT
@ FSQRT
Definition: ISDOpcodes.h:912
performOrXorChainCombine
static SDValue performOrXorChainCombine(SDNode *N, SelectionDAG &DAG)
Definition: AArch64ISelLowering.cpp:8862
llvm::AArch64ISD::FMAXV_PRED
@ FMAXV_PRED
Definition: AArch64ISelLowering.h:332
llvm::AArch64ISD::FROUNDEVEN_MERGE_PASSTHRU
@ FROUNDEVEN_MERGE_PASSTHRU
Definition: AArch64ISelLowering.h:137
llvm::StringRef::size
constexpr size_t size() const
size - Get the string size.
Definition: StringRef.h:137
llvm::ISD::INSERT_SUBVECTOR
@ INSERT_SUBVECTOR
INSERT_SUBVECTOR(VECTOR1, VECTOR2, IDX) - Returns a vector with VECTOR2 inserted into VECTOR1.
Definition: ISDOpcodes.h:558
llvm::HexagonISD::CP
@ CP
Definition: HexagonISelLowering.h:53
MBB
MachineBasicBlock & MBB
Definition: AArch64SLSHardening.cpp:74
llvm::AArch64_AM::encodeAdvSIMDModImmType5
static uint8_t encodeAdvSIMDModImmType5(uint64_t Imm)
Definition: AArch64AddressingModes.h:518
llvm::ISD::READ_REGISTER
@ READ_REGISTER
READ_REGISTER, WRITE_REGISTER - This node represents llvm.register on the DAG, which implements the n...
Definition: ISDOpcodes.h:118
llvm::AArch64Subtarget::getRegisterInfo
const AArch64RegisterInfo * getRegisterInfo() const override
Definition: AArch64Subtarget.h:182
llvm::TargetLoweringBase::getTargetMachine
const TargetMachine & getTargetMachine() const
Definition: TargetLowering.h:358
foldIndexIntoBase
static bool foldIndexIntoBase(SDValue &BasePtr, SDValue &Index, SDValue Scale, SDLoc DL, SelectionDAG &DAG)
Definition: AArch64ISelLowering.cpp:19561
Attributes.h
llvm::TargetLoweringBase::getSSPStackGuardCheck
virtual Function * getSSPStackGuardCheck(const Module &M) const
If the target has a standard stack protection check function that performs validation and error handl...
Definition: TargetLoweringBase.cpp:1991
llvm::MachineRegisterInfo::hasAtMostUserInstrs
bool hasAtMostUserInstrs(Register Reg, unsigned MaxUsers) const
hasAtMostUses - Return true if the given register has at most MaxUsers non-debug user instructions.
Definition: MachineRegisterInfo.cpp:425
llvm::AArch64TargetLowering::changeStreamingMode
SDValue changeStreamingMode(SelectionDAG &DAG, SDLoc DL, bool Enable, SDValue Chain, SDValue InFlag, SDValue PStateSM, bool Entry) const
If a change in streaming mode is required on entry to/return from a function call it emits and return...
Definition: AArch64ISelLowering.cpp:7131
llvm::SDNode::isOnlyUserOf
bool isOnlyUserOf(const SDNode *N) const
Return true if this node is the only use of N.
Definition: SelectionDAG.cpp:11228
convertMergedOpToPredOp
static SDValue convertMergedOpToPredOp(SDNode *N, unsigned Opc, SelectionDAG &DAG, bool UnpredOp=false, bool SwapOperands=false)
Definition: AArch64ISelLowering.cpp:18126
j
return j(j<< 16)
llvm::EVT::getHalfNumVectorElementsVT
EVT getHalfNumVectorElementsVT(LLVMContext &Context) const
Definition: ValueTypes.h:420
getExtensionTo64Bits
static EVT getExtensionTo64Bits(const EVT &OrigVT)
Definition: AArch64ISelLowering.cpp:4358
llvm::ISD::SETLT
@ SETLT
Definition: ISDOpcodes.h:1457
llvm::MVT::nxv16i1
@ nxv16i1
Definition: MachineValueType.h:210
GenerateTBL
static SDValue GenerateTBL(SDValue Op, ArrayRef< int > ShuffleMask, SelectionDAG &DAG)
Definition: AArch64ISelLowering.cpp:11201
llvm::MaskedGatherScatterSDNode::getScale
const SDValue & getScale() const
Definition: SelectionDAGNodes.h:2835
llvm::MVT::v8bf16
@ v8bf16
Definition: MachineValueType.h:161
llvm::AArch64ISD::GLD1S_SXTW_SCALED_MERGE_ZERO
@ GLD1S_SXTW_SCALED_MERGE_ZERO
Definition: AArch64ISelLowering.h:387
llvm::MVT::nxv2i16
@ nxv2i16
Definition: MachineValueType.h:223
llvm::isAllOnesConstant
bool isAllOnesConstant(SDValue V)
Returns true if V is an integer constant with all bits set.
Definition: SelectionDAG.cpp:10927
performExtractVectorEltCombine
static SDValue performExtractVectorEltCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, const AArch64Subtarget *Subtarget)
Definition: AArch64ISelLowering.cpp:16651
isAllInactivePredicate
static bool isAllInactivePredicate(SDValue N)
Definition: AArch64ISelLowering.cpp:16343
llvm::NVPTX::PTXLdStInstCode::V2
@ V2
Definition: NVPTX.h:124
llvm::TargetLoweringBase::AddrMode::BaseOffs
int64_t BaseOffs
Definition: TargetLowering.h:2597
llvm::CCState::getFirstUnallocated
unsigned getFirstUnallocated(ArrayRef< MCPhysReg > Regs) const
getFirstUnallocated - Return the index of the first unallocated register in the set,...
Definition: CallingConvLower.h:313
llvm::CCValAssign::isMemLoc
bool isMemLoc() const
Definition: CallingConvLower.h:121
llvm::AArch64ISD::FMAXNM_PRED
@ FMAXNM_PRED
Definition: AArch64ISelLowering.h:100
llvm::TargetLoweringBase::IntrinsicInfo
Definition: TargetLowering.h:1051
llvm::Twine
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
Definition: Twine.h:81
llvm::ISD::STRICT_FMUL
@ STRICT_FMUL
Definition: ISDOpcodes.h:402
llvm::AArch64TargetLowering::isLegalAddImmediate
bool isLegalAddImmediate(int64_t) const override
Return true if the specified immediate is legal add immediate, that is the target has add instruction...
Definition: AArch64ISelLowering.cpp:14807
llvm::ISD::STRICT_FMA
@ STRICT_FMA
Definition: ISDOpcodes.h:405
llvm::ISD::FMAXNUM
@ FMAXNUM
Definition: ISDOpcodes.h:943
llvm::Type::getInt64Ty
static IntegerType * getInt64Ty(LLVMContext &C)
Definition: Type.cpp:242
llvm::MachineFrameInfo::getObjectSSPLayout
SSPLayoutKind getObjectSSPLayout(int ObjectIdx) const
Definition: MachineFrameInfo.h:568
llvm::MaskedStoreSDNode::getBasePtr
const SDValue & getBasePtr() const
Definition: SelectionDAGNodes.h:2711
emitStrictFPComparison
static SDValue emitStrictFPComparison(SDValue LHS, SDValue RHS, const SDLoc &dl, SelectionDAG &DAG, SDValue Chain, bool IsSignaling)
Definition: AArch64ISelLowering.cpp:3058
llvm::AArch64ISD::THREAD_POINTER
@ THREAD_POINTER
Definition: AArch64ISelLowering.h:89
Insn
SmallVector< AArch64_IMM::ImmInsnModel, 4 > Insn
Definition: AArch64MIPeepholeOpt.cpp:129
llvm::AArch64Subtarget::getMaxSVEVectorSizeInBits
unsigned getMaxSVEVectorSizeInBits() const
Definition: AArch64Subtarget.h:369
llvm::AArch64ISD::MSRR
@ MSRR
Definition: AArch64ISelLowering.h:441
llvm::AArch64_AM::encodeAdvSIMDModImmType10
static uint8_t encodeAdvSIMDModImmType10(uint64_t Imm)
Definition: AArch64AddressingModes.h:614
std
Definition: BitVector.h:851
performExtendCombine
static SDValue performExtendCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, SelectionDAG &DAG)
Definition: AArch64ISelLowering.cpp:18550
llvm::AArch64CC::VS
@ VS
Definition: AArch64BaseInfo.h:261
llvm::SelectionDAG::computeKnownBits
KnownBits computeKnownBits(SDValue Op, unsigned Depth=0) const
Determine which bits of Op are known to be either zero or one and return them in Known.
Definition: SelectionDAG.cpp:2958
llvm::TargetLoweringBase::setLoadExtAction
void setLoadExtAction(unsigned ExtType, MVT ValVT, MVT MemVT, LegalizeAction Action)
Indicate that the specified load with extension does not work with the specified type and indicate wh...
Definition: TargetLowering.h:2390
llvm::GlobalAddressSDNode
Definition: SelectionDAGNodes.h:1763
llvm::KnownBits
Definition: KnownBits.h:23
performAddSubCombine
static SDValue performAddSubCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, SelectionDAG &DAG)
Definition: AArch64ISelLowering.cpp:17751
llvm::MachineFunction::getFunction
Function & getFunction()
Return the LLVM function that this machine code represents.
Definition: MachineFunction.h:638
llvm::AArch64ISD::FRSQRTS
@ FRSQRTS
Definition: AArch64ISelLowering.h:316
isAllActivePredicate
static bool isAllActivePredicate(SelectionDAG &DAG, SDValue N)
Definition: AArch64ISelLowering.cpp:16351
llvm::ISD::EXTRACT_SUBVECTOR
@ EXTRACT_SUBVECTOR
EXTRACT_SUBVECTOR(VECTOR, IDX) - Returns a subvector from VECTOR.
Definition: ISDOpcodes.h:572
llvm::AArch64Subtarget::ClassifyGlobalReference
unsigned ClassifyGlobalReference(const GlobalValue *GV, const TargetMachine &TM) const
ClassifyGlobalReference - Find the target operand flags that describe how a global value should be re...
Definition: AArch64Subtarget.cpp:359
mayTailCallThisCC
static bool mayTailCallThisCC(CallingConv::ID CC)
Return true if we might ever do TCO for calls with this calling convention.
Definition: AArch64ISelLowering.cpp:6866
llvm::ISD::FP_TO_SINT_SAT
@ FP_TO_SINT_SAT
FP_TO_[US]INT_SAT - Convert floating point value in operand 0 to a signed or unsigned scalar integer ...
Definition: ISDOpcodes.h:838
llvm::AArch64TargetLowering::getNumInterleavedAccesses
unsigned getNumInterleavedAccesses(VectorType *VecTy, const DataLayout &DL, bool UseScalable) const
Returns the number of interleaved accesses that will be generated when lowering accesses of the given...
Definition: AArch64ISelLowering.cpp:14312
llvm::TargetLoweringBase::AtomicExpansionKind
AtomicExpansionKind
Enum that specifies what an atomic load/AtomicRMWInst is expanded to, if at all.
Definition: TargetLowering.h:250
llvm::ISD::SADDO_CARRY
@ SADDO_CARRY
Carry-using overflow-aware nodes for multiple precision addition and subtraction.
Definition: ISDOpcodes.h:313
uint16_t
llvm::TargetLoweringBase::AddrMode::Scale
int64_t Scale
Definition: TargetLowering.h:2599
CallingConvLower.h
llvm::AArch64TargetLowering::isZExtFree
bool isZExtFree(Type *Ty1, Type *Ty2) const override
Return true if any actual instruction that defines a value of type FromTy implicitly zero-extends the...
Definition: AArch64ISelLowering.cpp:13686
llvm::MachineFunction::getTarget
const LLVMTargetMachine & getTarget() const
getTarget - Return the target machine this machine code is compiled with
Definition: MachineFunction.h:668
llvm::Type::getHalfTy
static Type * getHalfTy(LLVMContext &C)
Definition: Type.cpp:226
performAddSubLongCombine
static SDValue performAddSubLongCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, SelectionDAG &DAG)
Definition: AArch64ISelLowering.cpp:17422
llvm::AArch64_AM::encodeAdvSIMDModImmType4
static uint8_t encodeAdvSIMDModImmType4(uint64_t Imm)
Definition: AArch64AddressingModes.h:502
llvm::MaskedLoadStoreSDNode::getAddressingMode
ISD::MemIndexedMode getAddressingMode() const
Return the addressing mode for this load or store: unindexed, pre-inc, pre-dec, post-inc,...
Definition: SelectionDAGNodes.h:2641
llvm::isNullConstant
bool isNullConstant(SDValue V)
Returns true if V is a constant integer zero.
Definition: SelectionDAG.cpp:10917
convertFromScalableVector
static SDValue convertFromScalableVector(SelectionDAG &DAG, EVT VT, SDValue V)
Definition: AArch64ISelLowering.cpp:23302
llvm::codeview::InlineeLinesSignature::Normal
@ Normal
llvm::EVT::getScalarType
EVT getScalarType() const
If this is a vector type, return the element type, otherwise return this.
Definition: ValueTypes.h:295
llvm::AArch64ISD::CSNEG
@ CSNEG
Definition: AArch64ISelLowering.h:84
llvm::AMDGPU::SendMsg::Op
Op
Definition: SIDefines.h:357
llvm::AArch64ISD::STZG
@ STZG
Definition: AArch64ISelLowering.h:473
llvm::AArch64ISD::UDOT
@ UDOT
Definition: AArch64ISelLowering.h:259
llvm::ISD::BR
@ BR
Control flow instructions. These all have token chains.
Definition: ISDOpcodes.h:981
llvm::AArch64ISD::CALL_BTI
@ CALL_BTI
Definition: AArch64ISelLowering.h:59
llvm::MemSDNode::getAddressSpace
unsigned getAddressSpace() const
Return the address space for the associated pointer.
Definition: SelectionDAGNodes.h:1366
llvm::AArch64_IMM::expandMOVImm
void expandMOVImm(uint64_t Imm, unsigned BitSize, SmallVectorImpl< ImmInsnModel > &Insn)
Expand a MOVi32imm or MOVi64imm pseudo instruction to one or more real move-immediate instructions to...
Definition: AArch64ExpandImm.cpp:426
llvm::BuildVectorSDNode::getConstantSplatNode
ConstantSDNode * getConstantSplatNode(const APInt &DemandedElts, BitVector *UndefElements=nullptr) const
Returns the demanded splatted constant or null if this is not a constant splat.
Definition: SelectionDAG.cpp:11899
llvm::X86::FirstMacroFusionInstKind::Cmp
@ Cmp
llvm::ilist_iterator
Iterator for intrusive lists based on ilist_node.
Definition: ilist_iterator.h:57
MachineFrameInfo.h
llvm::Align::value
uint64_t value() const
This is a hole in the type system and should not be abused.
Definition: Alignment.h:85
llvm::AArch64TargetLowering::getVaListSizeInBits
unsigned getVaListSizeInBits(const DataLayout &DL) const override
Returns the size of the platform's va_list object.
Definition: AArch64ISelLowering.cpp:23092
llvm::ISD::FCOS
@ FCOS
Definition: ISDOpcodes.h:915
llvm::SelectionDAG::getEntryNode
SDValue getEntryNode() const
Return the token chain corresponding to the entry of the function.
Definition: SelectionDAG.h:550
llvm::ISD::FCEIL
@ FCEIL
Definition: ISDOpcodes.h:923
llvm::CallBase::arg_size
unsigned arg_size() const
Definition: InstrTypes.h:1349
llvm::ISD::getSetCCSwappedOperands
CondCode getSetCCSwappedOperands(CondCode Operation)
Return the operation corresponding to (Y op X) when given the operation for (X op Y).
Definition: SelectionDAG.cpp:559
llvm::AArch64ISD::FMOV
@ FMOV
Definition: AArch64ISelLowering.h:184
llvm::TargetLowering::ConstraintWeight
ConstraintWeight
Definition: TargetLowering.h:4630
llvm::ISD::FSIN
@ FSIN
Definition: ISDOpcodes.h:914
llvm::CallingConv::SwiftTail
@ SwiftTail
This follows the Swift calling convention in how arguments are passed but guarantees tail calls will ...
Definition: CallingConv.h:87
llvm::SelectionDAG::getDataLayout
const DataLayout & getDataLayout() const
Definition: SelectionDAG.h:468
performScatterStoreCombine
static SDValue performScatterStoreCombine(SDNode *N, SelectionDAG &DAG, unsigned Opcode, bool OnlyPackedOffsets=true)
Definition: AArch64ISelLowering.cpp:20984
llvm::ISD::OutputArg::VT
MVT VT
Definition: TargetCallingConv.h:235
llvm::MVT::v8i16
@ v8i16
Definition: MachineValueType.h:101
ISDOpcodes.h
llvm::AArch64ISD::UADDV_PRED
@ UADDV_PRED
Definition: AArch64ISelLowering.h:270
llvm::IRBuilderBase::GetInsertBlock
BasicBlock * GetInsertBlock() const
Definition: IRBuilder.h:174
llvm::AtomicRMWInst::And
@ And
*p = old & v
Definition: Instructions.h:738
Enabled
static bool Enabled
Definition: Statistic.cpp:46
llvm::ConstantFP::get
static Constant * get(Type *Ty, double V)
This returns a ConstantFP, or a vector containing a splat of a ConstantFP, for the specified value in...
Definition: Constants.cpp:934
llvm::AArch64TargetLowering::getPreferredVectorAction
TargetLoweringBase::LegalizeTypeAction getPreferredVectorAction(MVT VT) const override
Return the preferred vector type legalization action.
Definition: AArch64ISelLowering.cpp:22579
llvm::APInt::isNegatedPowerOf2
bool isNegatedPowerOf2() const
Check if this APInt's negated value is a power of two greater than zero.
Definition: APInt.h:441
llvm::TypeSize
Definition: TypeSize.h:314
llvm::AArch64CC::LS
@ LS
Definition: AArch64BaseInfo.h:264
llvm::AArch64TargetLowering::EmitInstrWithCustomInserter
MachineBasicBlock * EmitInstrWithCustomInserter(MachineInstr &MI, MachineBasicBlock *MBB) const override
This method should be implemented by targets that mark instructions with the 'usesCustomInserter' fla...
Definition: AArch64ISelLowering.cpp:2770
llvm::AArch64ISD::NEG_MERGE_PASSTHRU
@ NEG_MERGE_PASSTHRU
Definition: AArch64ISelLowering.h:149
llvm::ISD::BUILD_VECTOR
@ BUILD_VECTOR
BUILD_VECTOR(ELT0, ELT1, ELT2, ELT3,...) - Return a fixed-width vector with the specified,...
Definition: ISDOpcodes.h:514
Casting.h
LowerSVEIntrinsicDUP
static SDValue LowerSVEIntrinsicDUP(SDNode *N, SelectionDAG &DAG)
Definition: AArch64ISelLowering.cpp:17922
llvm::pdb::PDB_LocType::Slot
@ Slot
llvm::AArch64ISD::CSINC
@ CSINC
Definition: AArch64ISelLowering.h:85
llvm::ISD::STRICT_FCEIL
@ STRICT_FCEIL
Definition: ISDOpcodes.h:425
foldCSELofCTTZ
static SDValue foldCSELofCTTZ(SDNode *N, SelectionDAG &DAG)
Definition: AArch64ISelLowering.cpp:20225
Function.h
isValidImmForSVEVecImmAddrMode
static bool isValidImmForSVEVecImmAddrMode(unsigned OffsetInBytes, unsigned ScalarSizeInBytes)
Check if the value of OffsetInBytes can be used as an immediate for the gather load/prefetch and scat...
Definition: AArch64ISelLowering.cpp:20957
llvm::TargetLoweringBase::Custom
@ Custom
Definition: TargetLowering.h:201
llvm::SelectionDAG::getTargetExtractSubreg
SDValue getTargetExtractSubreg(int SRIdx, const SDLoc &DL, EVT VT, SDValue Operand)
A convenience function for creating TargetInstrInfo::EXTRACT_SUBREG nodes.
Definition: SelectionDAG.cpp:10064
llvm::AArch64ISD::GLDFF1S_SXTW_SCALED_MERGE_ZERO
@ GLDFF1S_SXTW_SCALED_MERGE_ZERO
Definition: AArch64ISelLowering.h:405
llvm::BitWidth
constexpr unsigned BitWidth
Definition: BitmaskEnum.h:147
isAddSubZExt
static bool isAddSubZExt(SDNode *N, SelectionDAG &DAG)
Definition: AArch64ISelLowering.cpp:4482
llvm::AArch64ISD::TBL
@ TBL
Definition: AArch64ISelLowering.h:327
llvm::LoopBase::getHeader
BlockT * getHeader() const
Definition: LoopInfo.h:105
llvm::AArch64_AM::isAdvSIMDModImmType10
static bool isAdvSIMDModImmType10(uint64_t Imm)
Definition: AArch64AddressingModes.h:594
llvm::SelectionDAG::getTargetExternalSymbol
SDValue getTargetExternalSymbol(const char *Sym, EVT VT, unsigned TargetFlags=0)
Definition: SelectionDAG.cpp:1915
llvm::MVT::i32
@ i32
Definition: MachineValueType.h:48
llvm::CCState::getNextStackOffset
unsigned getNextStackOffset() const
getNextStackOffset - Return the next stack offset such that all stack slots satisfy their alignment r...
Definition: CallingConvLower.h:241
llvm::TargetStackID::Value
Value
Definition: TargetFrameLowering.h:27
llvm::AArch64TargetLowering::initializeSplitCSR
void initializeSplitCSR(MachineBasicBlock *Entry) const override
Perform necessary initialization to handle a subset of CSRs explicitly via copies.
Definition: AArch64ISelLowering.cpp:23014
llvm::TargetLoweringBase::LegalizeTypeAction
LegalizeTypeAction
This enum indicates whether a types are legal for a target, and if not, what action should be used to...
Definition: TargetLowering.h:206
llvm::ISD::VECREDUCE_OR
@ VECREDUCE_OR
Definition: ISDOpcodes.h:1287
skipExtensionForVectorMULL
static SDValue skipExtensionForVectorMULL(SDNode *N, SelectionDAG &DAG)
Definition: AArch64ISelLowering.cpp:4434
llvm::ISD::SETUO
@ SETUO
Definition: ISDOpcodes.h:1444
llvm::Type::getPointerTo
PointerType * getPointerTo(unsigned AddrSpace=0) const
Return a pointer to the current type.
Definition: Type.cpp:776
llvm::TargetLibraryInfo
Provides information about what library functions are available for the current target.
Definition: TargetLibraryInfo.h:234
llvm::AArch64ISD::GLD1S_SXTW_MERGE_ZERO
@ GLD1S_SXTW_MERGE_ZERO
Definition: AArch64ISelLowering.h:385
llvm::AArch64_AM::encodeAdvSIMDModImmType8
static uint8_t encodeAdvSIMDModImmType8(uint64_t Imm)
Definition: AArch64AddressingModes.h:569
addRequiredExtensionForVectorMULL
static SDValue addRequiredExtensionForVectorMULL(SDValue N, SelectionDAG &DAG, const EVT &OrigTy, const EVT &ExtTy, unsigned ExtOpcode)
Definition: AArch64ISelLowering.cpp:4375
llvm::ARCCC::Z
@ Z
Definition: ARCInfo.h:41
OP_VDUP1
@ OP_VDUP1
Definition: ARMISelLowering.cpp:8314
llvm::ISD::SDIV
@ SDIV
Definition: ISDOpcodes.h:242
llvm::SDValue
Unlike LLVM values, Selection DAG nodes may return multiple values as the result of a computation.
Definition: SelectionDAGNodes.h:145
llvm::AArch64FunctionInfo::setJumpTableEntryInfo
void setJumpTableEntryInfo(int Idx, unsigned Size, MCSymbol *PCRelSym)
Definition: AArch64MachineFunctionInfo.h:366
llvm::AArch64TargetLowering::EmitF128CSEL
MachineBasicBlock * EmitF128CSEL(MachineInstr &MI, MachineBasicBlock *BB) const
Definition: AArch64ISelLowering.cpp:2630
getAtomicLoad128Opcode
static unsigned getAtomicLoad128Opcode(unsigned ISDOpcode, AtomicOrdering Ordering)
Definition: AArch64ISelLowering.cpp:22242
GeneratePerfectShuffle
static SDValue GeneratePerfectShuffle(unsigned ID, SDValue V1, SDValue V2, unsigned PFEntry, SDValue LHS, SDValue RHS, SelectionDAG &DAG, const SDLoc &dl)
GeneratePerfectShuffle - Given an entry in the perfect-shuffle table, emit the specified operations t...
Definition: AArch64ISelLowering.cpp:11034
llvm::ISD::TargetConstant
@ TargetConstant
TargetConstant* - Like Constant*, but the DAG does not do any folding, simplification,...
Definition: ISDOpcodes.h:158
llvm::MaskedGatherSDNode::getExtensionType
ISD::LoadExtType getExtensionType() const
Definition: SelectionDAGNodes.h:2859
llvm::AArch64ISD::SVE_LD3_MERGE_ZERO
@ SVE_LD3_MERGE_ZERO
Definition: AArch64ISelLowering.h:369
llvm::Function::getFunctionType
FunctionType * getFunctionType() const
Returns the FunctionType for me.
Definition: Function.h:174
llvm::AArch64ISD::FP_EXTEND_MERGE_PASSTHRU
@ FP_EXTEND_MERGE_PASSTHRU
Definition: AArch64ISelLowering.h:141
llvm::AArch64ISD::GLDFF1S_MERGE_ZERO
@ GLDFF1S_MERGE_ZERO
Definition: AArch64ISelLowering.h:400
llvm::TLSModel::Model
Model
Definition: CodeGen.h:45
llvm::TargetLoweringBase::ZeroOrOneBooleanContent
@ ZeroOrOneBooleanContent
Definition: TargetLowering.h:233
llvm::AArch64TargetLowering::getSafeStackPointerLocation
Value * getSafeStackPointerLocation(IRBuilderBase &IRB) const override
If the target has a standard location for the unsafe stack pointer, returns the address of that locat...
Definition: AArch64ISelLowering.cpp:22963
llvm::SmallVectorImpl::clear
void clear()
Definition: SmallVector.h:614
llvm::SDNodeFlags
These are IR-level optimization flags that may be propagated to SDNodes.
Definition: SelectionDAGNodes.h:379
getSVEPredicateBitCast
static SDValue getSVEPredicateBitCast(EVT VT, SDValue Op, SelectionDAG &DAG)
Definition: AArch64ISelLowering.cpp:4747
llvm::AArch64CC::getInvertedCondCode
static CondCode getInvertedCondCode(CondCode Code)
Definition: AArch64BaseInfo.h:303
llvm::TargetLowering::CW_Invalid
@ CW_Invalid
Definition: TargetLowering.h:4632
llvm::AArch64FunctionInfo::getLazySaveTPIDR2Obj
unsigned getLazySaveTPIDR2Obj() const
Definition: AArch64MachineFunctionInfo.h:206
llvm::AArch64TargetLowering::EmitZAInstr
MachineBasicBlock * EmitZAInstr(unsigned Opc, unsigned BaseReg, MachineInstr &MI, MachineBasicBlock *BB, bool HasTile) const
Definition: AArch64ISelLowering.cpp:2732
llvm::MCID::Add
@ Add
Definition: MCInstrDesc.h:186
llvm::SmallSet::insert
std::pair< const_iterator, bool > insert(const T &V)
insert - Insert an element into the set if it isn't already there.
Definition: SmallSet.h:177
llvm::AArch64CC::GT
@ GT
Definition: AArch64BaseInfo.h:267
llvm::Type::getBFloatTy
static Type * getBFloatTy(LLVMContext &C)
Definition: Type.cpp:227
llvm::TargetLoweringBase::setLibcallName
void setLibcallName(RTLIB::Libcall Call, const char *Name)
Rename the default libcall routine name for the specified libcall.
Definition: TargetLowering.h:3168
OP_VUZPR
@ OP_VUZPR
Definition: ARMISelLowering.cpp:8321
llvm::ISD::STORE
@ STORE
Definition: ISDOpcodes.h:966
llvm::ISD::VACOPY
@ VACOPY
VACOPY - VACOPY has 5 operands: an input chain, a destination pointer, a source pointer,...
Definition: ISDOpcodes.h:1081
llvm::AArch64ISD::ADRP
@ ADRP
Definition: AArch64ISelLowering.h:75
llvm::MVT::nxv8i8
@ nxv8i8
Definition: MachineValueType.h:217
llvm::APInt::getSignedMinValue
static APInt getSignedMinValue(unsigned numBits)
Gets minimum signed value of APInt for a specific bit width.
Definition: APInt.h:199
llvm::MVT::v8i32
@ v8i32
Definition: MachineValueType.h:116
llvm::Function::arg_begin
arg_iterator arg_begin()
Definition: Function.h:766
llvm::MachineMemOperand::MOStore
@ MOStore
The memory access writes data.
Definition: MachineMemOperand.h:136
llvm::ISD::SRL_PARTS
@ SRL_PARTS
Definition: ISDOpcodes.h:751
llvm::AArch64ISD::FCMEQz
@ FCMEQz
Definition: AArch64ISelLowering.h:241
llvm::ISD::UINT_TO_FP
@ UINT_TO_FP
Definition: ISDOpcodes.h:774
llvm::ISD::ADD
@ ADD
Simple integer binary arithmetic operators.
Definition: ISDOpcodes.h:239
llvm::ISD::SSUBO
@ SSUBO
Same for subtraction.
Definition: ISDOpcodes.h:327
llvm::SDValue::isUndef
bool isUndef() const
Definition: SelectionDAGNodes.h:1177
llvm::codeview::ModifierOptions::Const
@ Const
llvm::AArch64_AM::encodeAdvSIMDModImmType11
static uint8_t encodeAdvSIMDModImmType11(uint64_t Imm)
Definition: AArch64AddressingModes.h:663
llvm::APInt::sext
APInt sext(unsigned width) const
Sign extend to a new width.
Definition: APInt.cpp:946
performMSTORECombine
static SDValue performMSTORECombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, SelectionDAG &DAG, const AArch64Subtarget *Subtarget)
Definition: AArch64ISelLowering.cpp:19516
LowerXALUO
static SDValue LowerXALUO(SDValue Op, SelectionDAG &DAG)
Definition: AArch64ISelLowering.cpp:3822
llvm::AArch64Subtarget::isTargetFuchsia
bool isTargetFuchsia() const
Definition: AArch64Subtarget.h:266
tryAdvSIMDModImmFP
static SDValue tryAdvSIMDModImmFP(unsigned NewOp, SDValue Op, SelectionDAG &DAG, const APInt &Bits)
Definition: AArch64ISelLowering.cpp:11940
llvm::IntrinsicInst
A wrapper class for inspecting calls to intrinsic functions.
Definition: IntrinsicInst.h:47
llvm::MVT::nxv8f16
@ nxv8f16
Definition: MachineValueType.h:249
llvm::ISD::STRICT_FFLOOR
@ STRICT_FFLOOR
Definition: ISDOpcodes.h:426
llvm::AArch64ISD::DUPLANE64
@ DUPLANE64
Definition: AArch64ISelLowering.h:176
llvm::MaskedLoadSDNode::getMask
const SDValue & getMask() const
Definition: SelectionDAGNodes.h:2676
llvm::RISCVMatInt::Imm
@ Imm
Definition: RISCVMatInt.h:23
isEssentiallyExtractHighSubvector
static bool isEssentiallyExtractHighSubvector(SDValue N)
Definition: AArch64ISelLowering.cpp:17087
llvm::MachineFunction::ArgRegPair::Reg
Register Reg
Definition: MachineFunction.h:437
llvm::FPOpFusion::Fast
@ Fast
Definition: TargetOptions.h:37
getCmpOperandFoldingProfit
static unsigned getCmpOperandFoldingProfit(SDValue Op)
Returns how profitable it is to fold a comparison's operand's shift and/or extension operations.
Definition: AArch64ISelLowering.cpp:3419
llvm::AArch64ISD::ST2G
@ ST2G
Definition: AArch64ISelLowering.h:474
CodeGen.h
llvm::AArch64ISD::SST1_SXTW_PRED
@ SST1_SXTW_PRED
Definition: AArch64ISelLowering.h:420
llvm::AArch64Subtarget::isTargetMachO
bool isTargetMachO() const
Definition: AArch64Subtarget.h:271
llvm::AArch64ISD::FSUB_PRED
@ FSUB_PRED
Definition: AArch64ISelLowering.h:104
llvm::TLSModel::InitialExec
@ InitialExec
Definition: CodeGen.h:48
llvm::CodeModel::Large
@ Large
Definition: CodeGen.h:31
llvm::AArch64CC::NV
@ NV
Definition: AArch64BaseInfo.h:270
llvm::EVT::getVectorElementType
EVT getVectorElementType() const
Given a vector type, return the type of each element.
Definition: ValueTypes.h:300
llvm::AArch64::SMEMatrixTileD
@ SMEMatrixTileD
Definition: AArch64InstrInfo.h:591
llvm::MemSDNode::getAlign
Align getAlign() const
Definition: SelectionDAGNodes.h:1293
llvm::Type::getVoidTy
static Type * getVoidTy(LLVMContext &C)
Definition: Type.cpp:224
llvm::APInt::getSignMask
static APInt getSignMask(unsigned BitWidth)
Get the SignMask for a specific bit width.
Definition: APInt.h:209
llvm::SelectionDAG::GetSplitDestVTs
std::pair< EVT, EVT > GetSplitDestVTs(const EVT &VT) const
Compute the VTs needed for the low/hi parts of a type which is split (or expanded) into two not neces...
Definition: SelectionDAG.cpp:11621
performSubsToAndsCombine
static SDValue performSubsToAndsCombine(SDNode *N, SDNode *SubsNode, SDNode *AndNode, SelectionDAG &DAG, unsigned CCIndex, unsigned CmpIndex, unsigned CC)
Definition: AArch64ISelLowering.cpp:20038
llvm::TargetLoweringBase::setBooleanContents
void setBooleanContents(BooleanContent Ty)
Specify how the target extends the result of integer and floating point boolean values from i1 to a w...
Definition: TargetLowering.h:2290
isSignExtended
static bool isSignExtended(SDNode *N, SelectionDAG &DAG)
Definition: AArch64ISelLowering.cpp:4459
llvm::ISD::SETOGE
@ SETOGE
Definition: ISDOpcodes.h:1439
llvm::AArch64TargetLowering::CCAssignFnForReturn
CCAssignFn * CCAssignFnForReturn(CallingConv::ID CC) const
Selects the correct CCAssignFn for a given CallingConvention value.
Definition: AArch64ISelLowering.cpp:6278
llvm::ISD::FP_EXTEND
@ FP_EXTEND
X = FP_EXTEND(Y) - Extend a smaller FP type into a larger FP type.
Definition: ISDOpcodes.h:870
getVal
static bool getVal(MDTuple *MD, const char *Key, uint64_t &Val)
Definition: ProfileSummary.cpp:119
llvm::ShuffleVectorInst
This instruction constructs a fixed permutation of two input vectors.
Definition: Instructions.h:2017
llvm::TargetOptions::NoNaNsFPMath
unsigned NoNaNsFPMath
NoNaNsFPMath - This flag is enabled when the -enable-no-nans-fp-math flag is specified on the command...
Definition: TargetOptions.h:175
llvm::MachineFrameInfo
The MachineFrameInfo class represents an abstract stack frame until prolog/epilog code is inserted.
Definition: MachineFrameInfo.h:106
llvm::AArch64TargetLowering::shouldExpandGetActiveLaneMask
bool shouldExpandGetActiveLaneMask(EVT VT, EVT OpVT) const override
Return true if the @llvm.get.active.lane.mask intrinsic should be expanded using generic code in Sele...
Definition: AArch64ISelLowering.cpp:1684
llvm::countr_zero
int countr_zero(T Val)
Count number of 0's from the least significant bit to the most stopping at the first 1.
Definition: bit.h:179
tryLowerToSLI
static SDValue tryLowerToSLI(SDNode *N, SelectionDAG &DAG)
Definition: AArch64ISelLowering.cpp:11996
llvm::AArch64ISD::REINTERPRET_CAST
@ REINTERPRET_CAST
Definition: AArch64ISelLowering.h:352
llvm::CombineLevel
CombineLevel
Definition: DAGCombine.h:15
llvm::AArch64ISD::FTRUNC_MERGE_PASSTHRU
@ FTRUNC_MERGE_PASSTHRU
Definition: AArch64ISelLowering.h:139
isSigned
static bool isSigned(unsigned int Opcode)
Definition: ExpandLargeDivRem.cpp:52
llvm::getNumElementsFromSVEPredPattern
unsigned getNumElementsFromSVEPredPattern(unsigned Pattern)
Return the number of active elements for VL1 to VL256 predicate pattern, zero for all other patterns.
Definition: AArch64BaseInfo.h:478
llvm::AArch64TargetLowering::shouldProduceAndByConstByHoistingConstFromShiftsLHSOfAnd
bool shouldProduceAndByConstByHoistingConstFromShiftsLHSOfAnd(SDValue X, ConstantSDNode *XC, ConstantSDNode *CC, SDValue Y, unsigned OldShiftOpcode, unsigned NewShiftOpcode, SelectionDAG &DAG) const override
Given the pattern (X & (C l>>/<< Y)) ==/!= 0 return true if it should be transformed into: ((X <</l>>...
Definition: AArch64ISelLowering.cpp:22992
performTBISimplification
static bool performTBISimplification(SDValue Addr, TargetLowering::DAGCombinerInfo &DCI, SelectionDAG &DAG)
Simplify Addr given that the top byte of it is ignored by HW during address translation.
Definition: AArch64ISelLowering.cpp:19355
Instructions.h
Invalid
@ Invalid
Definition: AArch64ISelLowering.cpp:9958
llvm::AArch64ISD::REV16
@ REV16
Definition: AArch64ISelLowering.h:203
llvm::ISD::FSUB
@ FSUB
Definition: ISDOpcodes.h:391
llvm::User::getNumOperands
unsigned getNumOperands() const
Definition: User.h:191
AArch64Subtarget.h
llvm::Pattern
Definition: FileCheckImpl.h:614
llvm::MVT::f128
@ f128
Definition: MachineValueType.h:60
Other
std::optional< std::vector< StOtherPiece > > Other
Definition: ELFYAML.cpp:1260
llvm::ISD::PREFETCH
@ PREFETCH
PREFETCH - This corresponds to a prefetch intrinsic.
Definition: ISDOpcodes.h:1145
llvm::GetReturnInfo
void GetReturnInfo(CallingConv::ID CC, Type *ReturnType, AttributeList attr, SmallVectorImpl< ISD::OutputArg > &Outs, const TargetLowering &TLI, const DataLayout &DL)
Given an LLVM IR type and return type attributes, compute the return value EVTs and flags,...
Definition: TargetLoweringBase.cpp:1670
llvm::ISD::SHL
@ SHL
Shift and rotation operations.
Definition: ISDOpcodes.h:691
llvm::ISD::READCYCLECOUNTER
@ READCYCLECOUNTER
READCYCLECOUNTER - This corresponds to the readcyclecounter intrinsic.
Definition: ISDOpcodes.h:1112
llvm::AArch64FunctionInfo::getSRetReturnReg
unsigned getSRetReturnReg() const
Definition: AArch64MachineFunctionInfo.h:357
llvm::BuildMI
MachineInstrBuilder BuildMI(MachineFunction &MF, const MIMetadata &MIMD, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
Definition: MachineInstrBuilder.h:357
llvm::CC_AArch64_DarwinPCS_VarArg
bool CC_AArch64_DarwinPCS_VarArg(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, CCState &State)
SmallVector.h
llvm::MVT::v1i8
@ v1i8
Definition: MachineValueType.h:85
Upl
@ Upl
Definition: AArch64ISelLowering.cpp:9956
llvm::MVT::nxv8i1
@ nxv8i1
Definition: MachineValueType.h:209
llvm::ISD::FREM
@ FREM
Definition: ISDOpcodes.h:394
llvm::MachinePointerInfo::getFixedStack
static MachinePointerInfo getFixedStack(MachineFunction &MF, int FI, int64_t Offset=0)
Return a MachinePointerInfo record that refers to the specified FrameIndex.
Definition: MachineOperand.cpp:1049
SetCCInfo
Helper structure to keep track of SetCC information.
Definition: AArch64ISelLowering.cpp:17112
llvm::TargetLowering::parametersInCSRMatch
bool parametersInCSRMatch(const MachineRegisterInfo &MRI, const uint32_t *CallerPreservedMask, const SmallVectorImpl< CCValAssign > &ArgLocs, const SmallVectorImpl< SDValue > &OutVals) const
Check whether parameters to a call that are passed in callee saved registers are the same as from the...
Definition: TargetLowering.cpp:81
llvm::MachineBasicBlock::begin
iterator begin()
Definition: MachineBasicBlock.h:309
MachineInstrBuilder.h
llvm::IRBuilderBase::CreateCall
CallInst * CreateCall(FunctionType *FTy, Value *Callee, ArrayRef< Value * > Args=std::nullopt, const Twine &Name="", MDNode *FPMathTag=nullptr)
Definition: IRBuilder.h:2293
performANDORCSELCombine
static SDValue performANDORCSELCombine(SDNode *N, SelectionDAG &DAG)
Definition: AArch64ISelLowering.cpp:16226
llvm::ISD::isUnsignedIntSetCC
bool isUnsignedIntSetCC(CondCode Code)
Return true if this is a setcc instruction that performs an unsigned comparison when used with intege...
Definition: ISDOpcodes.h:1473
llvm::AArch64ISD::LS64_BUILD
@ LS64_BUILD
Definition: AArch64ISelLowering.h:355
llvm::ISD::MUL
@ MUL
Definition: ISDOpcodes.h:241
llvm::ISD::UREM
@ UREM
Definition: ISDOpcodes.h:245
llvm::AArch64::RoundingBitsPos
const unsigned RoundingBitsPos
Definition: AArch64ISelLowering.h:505
llvm::TargetLoweringBase::Expand
@ Expand
Definition: TargetLowering.h:199
llvm::AArch64ISD::GLD1_UXTW_MERGE_ZERO
@ GLD1_UXTW_MERGE_ZERO
Definition: AArch64ISelLowering.h:375
llvm::AArch64ISD::UADDV
@ UADDV
Definition: AArch64ISelLowering.h:250
llvm::ISD::ZERO_EXTEND_VECTOR_INREG
@ ZERO_EXTEND_VECTOR_INREG
ZERO_EXTEND_VECTOR_INREG(Vector) - This operator represents an in-register zero-extension of the low ...
Definition: ISDOpcodes.h:814
llvm::AArch64TargetLowering::shouldConvertFpToSat
bool shouldConvertFpToSat(unsigned Op, EVT FPVT, EVT VT) const override
Should we generate fp_to_si_sat and fp_to_ui_sat from type FPVT to type VT from min(max(fptoi)) satur...
Definition: AArch64ISelLowering.cpp:23078
llvm::CCValAssign::getValVT
MVT getValVT() const
Definition: CallingConvLower.h:118
llvm::MVT::f16
@ f16
Definition: MachineValueType.h:56
llvm::SDNode::hasPredecessorHelper
static bool hasPredecessorHelper(const SDNode *N, SmallPtrSetImpl< const SDNode * > &Visited, SmallVectorImpl< const SDNode * > &Worklist, unsigned int MaxSteps=0, bool TopologicalPrune=false)
Returns true if N is a predecessor of any node in Worklist.
Definition: SelectionDAGNodes.h:849
llvm::AArch64ISD::UMAXV_PRED
@ UMAXV_PRED
Definition: AArch64ISelLowering.h:272
llvm::CallBase::getArgOperand
Value * getArgOperand(unsigned i) const
Definition: InstrTypes.h:1351
llvm::TargetLowering::useLoadStackGuardNode
virtual bool useLoadStackGuardNode() const
If this function returns true, SelectionDAGBuilder emits a LOAD_STACK_GUARD node when it is lowering ...
Definition: TargetLowering.h:5183
N
#define N
llvm::TargetLoweringBase::computeRegisterProperties
void computeRegisterProperties(const TargetRegisterInfo *TRI)
Once all of the register classes are added, this allows us to compute derived properties we expose.
Definition: TargetLoweringBase.cpp:1285
EnableCombineMGatherIntrinsics
static cl::opt< bool > EnableCombineMGatherIntrinsics("aarch64-enable-mgather-combine", cl::Hidden, cl::desc("Combine extends of AArch64 masked " "gather intrinsics"), cl::init(true))
llvm::AArch64ISD::ABS_MERGE_PASSTHRU
@ ABS_MERGE_PASSTHRU
Definition: AArch64ISelLowering.h:148
llvm::ISD::BITREVERSE
@ BITREVERSE
Definition: ISDOpcodes.h:704
selectUmullSmull
static unsigned selectUmullSmull(SDNode *&N0, SDNode *&N1, SelectionDAG &DAG, SDLoc DL, bool &IsMLA)
Definition: AArch64ISelLowering.cpp:4560
llvm::ISD::LAST_INDEXED_MODE
static const int LAST_INDEXED_MODE
Definition: ISDOpcodes.h:1385
llvm::AArch64ISD::GLD1_SCALED_MERGE_ZERO
@ GLD1_SCALED_MERGE_ZERO
Definition: AArch64ISelLowering.h:374
llvm::ISD::SRL
@ SRL
Definition: ISDOpcodes.h:693
llvm::ISD::ADDROFRETURNADDR
@ ADDROFRETURNADDR
ADDROFRETURNADDR - Represents the llvm.addressofreturnaddress intrinsic.
Definition: ISDOpcodes.h:101
llvm::ComplexDeinterleavingOperation::CAdd
@ CAdd
llvm::AArch64ISD::ST2LANEpost
@ ST2LANEpost
Definition: AArch64ISelLowering.h:468
llvm::TargetMachine::getTargetTriple
const Triple & getTargetTriple() const
Definition: TargetMachine.h:127
llvm::AArch64ISD::UMAXV
@ UMAXV
Definition: AArch64ISelLowering.h:267
llvm::CallingConv::CFGuard_Check
@ CFGuard_Check
Special calling convention on Windows for calling the Control Guard Check ICall funtion.
Definition: CallingConv.h:82
llvm::KnownBits::commonBits
static KnownBits commonBits(const KnownBits &LHS, const KnownBits &RHS)
Compute known bits common to LHS and RHS.
Definition: KnownBits.h:315
llvm::MVT::nxv1i1
@ nxv1i1
Definition: MachineValueType.h:206
llvm::SelectionDAG::getRegisterMask
SDValue getRegisterMask(const uint32_t *RegMask)
Definition: SelectionDAG.cpp:2162
llvm::ArrayRef::size
size_t size() const
size - Get the array size.
Definition: ArrayRef.h:163
llvm::MemSDNode::getSuccessOrdering
AtomicOrdering getSuccessOrdering() const
Return the atomic ordering requirements for this memory operation.
Definition: SelectionDAGNodes.h:1334
llvm::LLT::getSizeInBytes
constexpr TypeSize getSizeInBytes() const
Returns the total size of the type in bytes, i.e.
Definition: LowLevelTypeImpl.h:169
llvm::MVT::getVT
static MVT getVT(Type *Ty, bool HandleUnknown=false)
Return the value type corresponding to the specified type.
Definition: ValueTypes.cpp:571
llvm::TargetLowering::DAGCombinerInfo::CombineTo
SDValue CombineTo(SDNode *N, ArrayRef< SDValue > To, bool AddTo=true)
Definition: DAGCombiner.cpp:982
llvm::TargetLoweringBase::setCondCodeAction
void setCondCodeAction(ArrayRef< ISD::CondCode > CCs, MVT VT, LegalizeAction Action)
Indicate that the specified condition code is or isn't supported on the target and indicate what to d...
Definition: TargetLowering.h:2473
llvm::AArch64ISD::GLD1S_MERGE_ZERO
@ GLD1S_MERGE_ZERO
Definition: AArch64ISelLowering.h:382
llvm::ISD::CTTZ
@ CTTZ
Definition: ISDOpcodes.h:701
llvm::TargetLoweringBase::getRegClassFor
virtual const TargetRegisterClass * getRegClassFor(MVT VT, bool isDivergent=false) const
Return the register class that should be used for the specified value type.
Definition: TargetLowering.h:891
shift
http eax xorl edx cl sete al setne dl sall eax sall edx But that requires good bit subreg support this might be better It s an extra shift
Definition: README.txt:30
TargetTransformInfo.h
llvm::tgtok::Bit
@ Bit
Definition: TGLexer.h:50
getPredicateForVector
static SDValue getPredicateForVector(SelectionDAG &DAG, SDLoc &DL, EVT VT)
Definition: AArch64ISelLowering.cpp:23283
llvm::ISD::STRICT_FADD
@ STRICT_FADD
Constrained versions of the binary floating point operators.
Definition: ISDOpcodes.h:400
AArch64SetCCInfo::Cmp
const SDValue * Cmp
Definition: AArch64ISelLowering.cpp:17107
llvm::MachineFunction::getDataLayout
const DataLayout & getDataLayout() const
Return the DataLayout attached to the Module associated to this MF.
Definition: MachineFunction.cpp:292
llvm::ISD::PARITY
@ PARITY
Definition: ISDOpcodes.h:705
llvm::CallingConv::GHC
@ GHC
Used by the Glasgow Haskell Compiler (GHC).
Definition: CallingConv.h:50
llvm::FunctionCallee
A handy container for a FunctionType+Callee-pointer pair, which can be passed around as a single enti...
Definition: DerivedTypes.h:165
llvm::SelectionDAG::getStepVector
SDValue getStepVector(const SDLoc &DL, EVT ResVT, APInt StepVal)
Returns a vector of type ResVT whose elements contain the linear sequence <0, Step,...
Definition: SelectionDAG.cpp:1943
llvm::TargetLoweringBase::AddrMode
This represents an addressing mode of: BaseGV + BaseOffs + BaseReg + Scale*ScaleReg If BaseGV is null...
Definition: TargetLowering.h:2595
performSTNT1Combine
static SDValue performSTNT1Combine(SDNode *N, SelectionDAG &DAG)
Definition: AArch64ISelLowering.cpp:18746
getSVEContainerIRType
static ScalableVectorType * getSVEContainerIRType(FixedVectorType *VTy)
Definition: AArch64ISelLowering.cpp:14363
llvm::TargetLowering::DAGCombinerInfo::CommitTargetLoweringOpt
void CommitTargetLoweringOpt(const TargetLoweringOpt &TLO)
Definition: DAGCombiner.cpp:1002
llvm::EVT::isPow2VectorType
bool isPow2VectorType() const
Returns true if the given vector is a power of 2.
Definition: ValueTypes.h:437
llvm::ISD::STRICT_LLROUND
@ STRICT_LLROUND
Definition: ISDOpcodes.h:431
llvm::AArch64ISD::TBZ
@ TBZ
Definition: AArch64ISelLowering.h:285
llvm::KnownBits::makeConstant
static KnownBits makeConstant(const APInt &C)
Create known bits from a known constant.
Definition: KnownBits.h:310
llvm::AArch64RegisterInfo::UpdateCustomCalleeSavedRegs
void UpdateCustomCalleeSavedRegs(MachineFunction &MF) const
Definition: AArch64RegisterInfo.cpp:177
llvm::PatternMatch
Definition: PatternMatch.h:47
llvm::TargetLoweringBase::AtomicExpansionKind::None
@ None
llvm::MVT::changeVectorElementType
MVT changeVectorElementType(MVT EltVT) const
Return a VT for a vector type whose attributes match ourselves with the exception of the element type...
Definition: MachineValueType.h:500
llvm::ISD::UMIN
@ UMIN
Definition: ISDOpcodes.h:662
llvm::MipsISD::Ins
@ Ins
Definition: MipsISelLowering.h:160
llvm::AArch64ISD::FCCMP
@ FCCMP
Definition: AArch64ISelLowering.h:163
llvm::MVT::Untyped
@ Untyped
Definition: MachineValueType.h:286
MachineMemOperand.h
llvm::AArch64TargetLowering::getTgtMemIntrinsic
bool getTgtMemIntrinsic(IntrinsicInfo &Info, const CallInst &I, MachineFunction &MF, unsigned Intrinsic) const override
getTgtMemIntrinsic - Represent NEON load and store intrinsics as MemIntrinsicNodes.
Definition: AArch64ISelLowering.cpp:13465
llvm::SmallVectorImpl
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
Definition: APFloat.h:42
llvm::ISD::MULHU
@ MULHU
MULHU/MULHS - Multiply high - Multiply two integers of type iN, producing an unsigned/signed value of...
Definition: ISDOpcodes.h:637
llvm::TargetOptions::UnsafeFPMath
unsigned UnsafeFPMath
UnsafeFPMath - This flag is enabled when the -enable-unsafe-fp-math flag is specified on the command ...
Definition: TargetOptions.h:163
llvm::SDNode::setCFIType
void setCFIType(uint32_t Type)
Definition: SelectionDAGNodes.h:979
RegName
#define RegName(no)
llvm::AArch64CC::CondCode
CondCode
Definition: AArch64BaseInfo.h:254
llvm::CC_AArch64_Win64_VarArg
bool CC_AArch64_Win64_VarArg(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, CCState &State)
llvm::MaskedScatterSDNode
This class is used to represent an MSCATTER node.
Definition: SelectionDAGNodes.h:2870
llvm::ISD::LROUND
@ LROUND
Definition: ISDOpcodes.h:930
llvm::Function::hasMinSize
bool hasMinSize() const
Optimize this function for minimum size (-Oz).
Definition: Function.h:641
llvm::MachineBasicBlock::transferSuccessorsAndUpdatePHIs
void transferSuccessorsAndUpdatePHIs(MachineBasicBlock *FromMBB)
Transfers all the successors, as in transferSuccessors, and update PHI operands in the successor bloc...
Definition: MachineBasicBlock.cpp:911
llvm::SDValue::getOpcode
unsigned getOpcode() const
Definition: SelectionDAGNodes.h:1137
llvm::ShuffleVectorSDNode::isSplatMask
static bool isSplatMask(const int *Mask, EVT VT)
Definition: SelectionDAG.cpp:12068
llvm::Type::getInt16Ty
static IntegerType * getInt16Ty(LLVMContext &C)
Definition: Type.cpp:240
tryCombineLongOpWithDup
static SDValue tryCombineLongOpWithDup(unsigned IID, SDNode *N, TargetLowering::DAGCombinerInfo &DCI, SelectionDAG &DAG)
Definition: AArch64ISelLowering.cpp:17780
llvm::ISD::STRICT_FREM
@ STRICT_FREM
Definition: ISDOpcodes.h:404
llvm::SelectionDAG::getTargetConstant
SDValue getTargetConstant(uint64_t Val, const SDLoc &DL, EVT VT, bool isOpaque=false)
Definition: SelectionDAG.h:671
llvm::Module::getDataLayout
const DataLayout & getDataLayout() const
Get the data layout for the module's target platform.
Definition: Module.cpp:398
llvm::ArrayRef
ArrayRef(const T &OneElt) -> ArrayRef< T >
DerivedTypes.h
llvm::CallingConv::AArch64_SME_ABI_Support_Routines_PreserveMost_From_X0
@ AArch64_SME_ABI_Support_Routines_PreserveMost_From_X0
Preserve X0-X13, X19-X29, SP, Z0-Z31, P0-P15.
Definition: CallingConv.h:235
llvm::GlobalValue::getValueType
Type * getValueType() const
Definition: GlobalValue.h:292
llvm::AArch64TargetLowering::shouldSinkOperands
bool shouldSinkOperands(Instruction *I, SmallVectorImpl< Use * > &Ops) const override
Check if sinking I's operands to I's basic block is profitable, because the operands can be folded in...
Definition: AArch64ISelLowering.cpp:13869
TM
const char LLVMTargetMachineRef TM
Definition: PassBuilderBindings.cpp:47
llvm::ISD::SETONE
@ SETONE
Definition: ISDOpcodes.h:1442
llvm::MVT::v32i8
@ v32i8
Definition: MachineValueType.h:90
llvm::TargetLoweringBase::Enabled
@ Enabled
Definition: TargetLowering.h:532
emitConjunctionRec
static SDValue emitConjunctionRec(SelectionDAG &DAG, SDValue Val, AArch64CC::CondCode &OutCC, bool Negate, SDValue CCOp, AArch64CC::CondCode Predicate)
Emit conjunction or disjunction tree with the CMP/FCMP followed by a chain of CCMP/CFCMP ops.
Definition: AArch64ISelLowering.cpp:3293
llvm::EVT::bitsGT
bool bitsGT(EVT VT) const
Return true if this has more bits than VT.
Definition: ValueTypes.h:256
llvm::APInt::getLowBitsSet
static APInt getLowBitsSet(unsigned numBits, unsigned loBitsSet)
Constructs an APInt value that has the bottom loBitsSet bits set.
Definition: APInt.h:289
isREVMask
static bool isREVMask(ArrayRef< int > M, EVT VT, unsigned BlockSize)
isREVMask - Check if a vector shuffle corresponds to a REV instruction with the specified blocksize.
Definition: AArch64ISelLowering.cpp:10821
llvm::MVT::i16
@ i16
Definition: MachineValueType.h:47
llvm::ISD::INTRINSIC_W_CHAIN
@ INTRINSIC_W_CHAIN
RESULT,OUTCHAIN = INTRINSIC_W_CHAIN(INCHAIN, INTRINSICID, arg1, ...) This node represents a target in...
Definition: ISDOpcodes.h:192
llvm::AArch64_AM::encodeAdvSIMDModImmType9
static uint8_t encodeAdvSIMDModImmType9(uint64_t Imm)
Definition: AArch64AddressingModes.h:580
llvm::TargetLoweringBase::shouldReduceLoadWidth
virtual bool shouldReduceLoadWidth(SDNode *Load, ISD::LoadExtType ExtTy, EVT NewVT) const
Return true if it is profitable to reduce a load to a smaller type.
Definition: TargetLowering.h:1638
llvm::ScalableVectorType::get
static ScalableVectorType * get(Type *ElementType, unsigned MinNumElts)
Definition: Type.cpp:707
llvm::CallInst
This class represents a function call, abstracting a target machine's calling convention.
Definition: Instructions.h:1485
llvm::ConstantFPSDNode::isZero
bool isZero() const
Return true if the value is positive or negative zero.
Definition: SelectionDAGNodes.h:1650
llvm::ISD::FNEG
@ FNEG
Perform various unary floating-point operations inspired by libm.
Definition: ISDOpcodes.h:910
llvm::SelectionDAG::getMachineFunction
MachineFunction & getMachineFunction() const
Definition: SelectionDAG.h:465
ReplaceReductionResults
static void ReplaceReductionResults(SDNode *N, SmallVectorImpl< SDValue > &Results, SelectionDAG &DAG, unsigned InterOp, unsigned AcrossOp)
Definition: AArch64ISelLowering.cpp:22080
llvm::AArch64II::MO_G2
@ MO_G2
MO_G2 - A symbol operand with this flag (granule 2) represents the bits 32-47 of a 64-bit address,...
Definition: AArch64BaseInfo.h:719
llvm::omp::RTLDependInfoFields::Flags
@ Flags
BB
Common register allocation spilling lr str ldr sxth r3 ldr mla r4 can lr mov lr str ldr sxth r3 mla r4 and then merge mul and lr str ldr sxth r3 mla r4 It also increase the likelihood the store may become dead bb27 Successors according to LLVM BB
Definition: README.txt:39
llvm::MachineFrameInfo::getStackProtectorIndex
int getStackProtectorIndex() const
Return the index for the stack protector object.
Definition: MachineFrameInfo.h:358
OP_VEXT2
@ OP_VEXT2
Definition: ARMISelLowering.cpp:8318
llvm::CodeGenOpt::Level
Level
Code generation optimization level.
Definition: CodeGen.h:57
llvm::CallingConv::Tail
@ Tail
Attemps to make calls as fast as possible while guaranteeing that tail call optimization can always b...
Definition: CallingConv.h:76
llvm::ISD::STRICT_FEXP2
@ STRICT_FEXP2
Definition: ISDOpcodes.h:417
performAddCombineForShiftedOperands
static SDValue performAddCombineForShiftedOperands(SDNode *N, SelectionDAG &DAG)
Definition: AArch64ISelLowering.cpp:17682
llvm::AArch64II::MO_COFFSTUB
@ MO_COFFSTUB
MO_COFFSTUB - On a symbol operand "FOO", this indicates that the reference is actually to the "....
Definition: AArch64BaseInfo.h:737
getPredicateForFixedLengthVector
static SDValue getPredicateForFixedLengthVector(SelectionDAG &DAG, SDLoc &DL, EVT VT)
Definition: AArch64ISelLowering.cpp:23231
llvm::ISD::GET_ROUNDING
@ GET_ROUNDING
Returns current rounding mode: -1 Undefined 0 Round to 0 1 Round to nearest, ties to even 2 Round to ...
Definition: ISDOpcodes.h:862
llvm::ISD::BUILD_PAIR
@ BUILD_PAIR
BUILD_PAIR - This is the opposite of EXTRACT_ELEMENT in some ways.
Definition: ISDOpcodes.h:229
llvm::ISD::VAARG
@ VAARG
VAARG - VAARG has four operands: an input chain, a pointer, a SRCVALUE, and the alignment.
Definition: ISDOpcodes.h:1076
llvm::AArch64FunctionInfo::setIsSplitCSR
void setIsSplitCSR(bool s)
Definition: AArch64MachineFunctionInfo.h:246
llvm::KnownBits::getBitWidth
unsigned getBitWidth() const
Get the bit width of this value.
Definition: KnownBits.h:40
performBRCONDCombine
static SDValue performBRCONDCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, SelectionDAG &DAG)
Definition: AArch64ISelLowering.cpp:20163
llvm::ISD::SDIVREM
@ SDIVREM
SDIVREM/UDIVREM - Divide two integers and produce both a quotient and remainder result.
Definition: ISDOpcodes.h:255
llvm::SelectionDAG::getExternalSymbol
SDValue getExternalSymbol(const char *Sym, EVT VT)
Definition: SelectionDAG.cpp:1898
llvm::AArch64ISD::BSWAP_MERGE_PASSTHRU
@ BSWAP_MERGE_PASSTHRU
Definition: AArch64ISelLowering.h:343
llvm::isMask_64
constexpr bool isMask_64(uint64_t Value)
Return true if the argument is a non-empty sequence of ones starting at the least significant bit wit...
Definition: MathExtras.h:274
AArch64SetCCInfo::CC
AArch64CC::CondCode CC
Definition: AArch64ISelLowering.cpp:17108
llvm::AArch64ISD::INSR
@ INSR
Definition: AArch64ISelLowering.h:337
llvm::AArch64ISD::LD2LANEpost
@ LD2LANEpost
Definition: AArch64ISelLowering.h:465
llvm::AArch64::SVEMaxBitsPerVector
static constexpr unsigned SVEMaxBitsPerVector
Definition: AArch64BaseInfo.h:836
llvm::TargetLoweringBase::MaxLoadsPerMemcmp
unsigned MaxLoadsPerMemcmp
Specify maximum number of load instructions per memcmp call.
Definition: TargetLowering.h:3472
llvm::DebugLoc
A debug info location.
Definition: DebugLoc.h:33
llvm::MachineFrameInfo::setFrameAddressIsTaken
void setFrameAddressIsTaken(bool T)
Definition: MachineFrameInfo.h:372
llvm::AMDGPU::HSAMD::Kernel::Key::Args
constexpr char Args[]
Key for Kernel::Metadata::mArgs.
Definition: AMDGPUMetadata.h:394
llvm::EVT::bitsGE
bool bitsGE(EVT VT) const
Return true if this has no less bits than VT.
Definition: ValueTypes.h:264
llvm::ISD::STRICT_FPOWI
@ STRICT_FPOWI
Definition: ISDOpcodes.h:413
llvm::AllocaInst
an instruction to allocate memory on the stack
Definition: Instructions.h:58
performVectorCompareAndMaskUnaryOpCombine
static SDValue performVectorCompareAndMaskUnaryOpCombine(SDNode *N, SelectionDAG &DAG)
Definition: AArch64ISelLowering.cpp:15833
llvm::AArch64ISD::LASTA
@ LASTA
Definition: AArch64ISelLowering.h:325
foldOverflowCheck
static SDValue foldOverflowCheck(SDNode *Op, SelectionDAG &DAG, bool IsAdd)
Definition: AArch64ISelLowering.cpp:17489
llvm::User::getOperand
Value * getOperand(unsigned i) const
Definition: User.h:169
llvm::AArch64TargetLowering::preferIncOfAddToSubOfNot
bool preferIncOfAddToSubOfNot(EVT VT) const override
These two forms are equivalent: sub y, (xor x, -1) add (add x, 1), y The variant with two add's is IR...
Definition: AArch64ISelLowering.cpp:23073
tryFormConcatFromShuffle
static SDValue tryFormConcatFromShuffle(SDValue Op, SelectionDAG &DAG)
Definition: AArch64ISelLowering.cpp:11001
llvm::cl::desc
Definition: CommandLine.h:411
llvm::ISD::ATOMIC_LOAD_XOR
@ ATOMIC_LOAD_XOR
Definition: ISDOpcodes.h:1192
llvm::EVT::is128BitVector
bool is128BitVector() const
Return true if this is a 128-bit vector type.
Definition: ValueTypes.h:185
llvm::MVT::fp_valuetypes
static auto fp_valuetypes()
Definition: MachineValueType.h:1531
llvm::TargetLoweringBase::getValueType
EVT getValueType(const DataLayout &DL, Type *Ty, bool AllowUnknown=false) const
Return the EVT corresponding to this LLVM type.
Definition: TargetLowering.h:1500
llvm::AArch64::SMEMatrixType
SMEMatrixType
Definition: AArch64InstrInfo.h:585
llvm::M1
unsigned M1(unsigned Val)
Definition: VE.h:468
llvm::AArch64ISD::ST4post
@ ST4post
Definition: AArch64ISelLowering.h:453
llvm::AArch64ISD::ST1x3post
@ ST1x3post
Definition: AArch64ISelLowering.h:458
llvm::AArch64ISD::WrapperLarge
@ WrapperLarge
Definition: AArch64ISelLowering.h:52
llvm::ISD::VECREDUCE_SMIN
@ VECREDUCE_SMIN
Definition: ISDOpcodes.h:1290
llvm::AArch64Subtarget
Definition: AArch64Subtarget.h:38
llvm::AtomicRMWInst::UMax
@ UMax
*p = old >unsigned v ? old : v
Definition: Instructions.h:750
llvm::ISD::SIGN_EXTEND
@ SIGN_EXTEND
Conversion operators.
Definition: ISDOpcodes.h:760
llvm::MaskedLoadSDNode::getOffset
const SDValue & getOffset() const
Definition: SelectionDAGNodes.h:2675
raw_ostream.h
llvm::AArch64TargetLowering::isLegalAddressingMode
bool isLegalAddressingMode(const DataLayout &DL, const AddrMode &AM, Type *Ty, unsigned AS, Instruction *I=nullptr) const override
Return true if the addressing mode represented by AM is legal for this target, for a load/store of th...
Definition: AArch64ISelLowering.cpp:14859
llvm::AArch64TargetLowering
Definition: AArch64ISelLowering.h:515
llvm::AArch64TargetLowering::getPointerTy
MVT getPointerTy(const DataLayout &DL, uint32_t AS=0) const override
Return the pointer type for the given address space, defaults to the pointer type from the data layou...
Definition: AArch64ISelLowering.h:538
llvm::AArch64ISD::MOPS_MEMCOPY
@ MOPS_MEMCOPY
Definition: AArch64ISelLowering.h:487
llvm::MVT::v8i8
@ v8i8
Definition: MachineValueType.h:88
llvm::ISD::VECREDUCE_UMIN
@ VECREDUCE_UMIN
Definition: ISDOpcodes.h:1292
llvm::createSequentialMask
llvm::SmallVector< int, 16 > createSequentialMask(unsigned Start, unsigned NumInts, unsigned NumUndefs)
Create a sequential shuffle mask.
Definition: VectorUtils.cpp:983
llvm::MVT::nxv2i8
@ nxv2i8
Definition: MachineValueType.h:215
llvm::AArch64ISD::LD4DUPpost
@ LD4DUPpost
Definition: AArch64ISelLowering.h:463
n
The same transformation can work with an even modulo with the addition of a and shrink the compare RHS by the same amount Unless the target supports that transformation probably isn t worthwhile The transformation can also easily be made to work with non zero equality for n
Definition: README.txt:685
llvm::MaskedGatherScatterSDNode::getIndex
const SDValue & getIndex() const
Definition: SelectionDAGNodes.h:2833
llvm::TargetLoweringBase::finalizeLowering
virtual void finalizeLowering(MachineFunction &MF) const
Execute target specific actions to finalize target lowering.
Definition: TargetLoweringBase.cpp:2244
llvm::MVT::v8f32
@ v8f32
Definition: MachineValueType.h:174
MachineFunction.h
llvm::AArch64ISD::PTEST_ANY
@ PTEST_ANY
Definition: AArch64ISelLowering.h:339
isUZPMask
static bool isUZPMask(ArrayRef< int > M, EVT VT, unsigned &WhichResult)
Definition: AArch64ISelLowering.cpp:10862
performSignExtendInRegCombine
static SDValue performSignExtendInRegCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, SelectionDAG &DAG)
Definition: AArch64ISelLowering.cpp:21190
llvm::ISD::FTRUNC
@ FTRUNC
Definition: ISDOpcodes.h:924
llvm::AArch64ISD::BIT
@ BIT
Definition: AArch64ISelLowering.h:280
ReconstructTruncateFromBuildVector
static SDValue ReconstructTruncateFromBuildVector(SDValue V, SelectionDAG &DAG)
Definition: AArch64ISelLowering.cpp:10655
performBuildVectorCombine
static SDValue performBuildVectorCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, SelectionDAG &DAG)
Definition: AArch64ISelLowering.cpp:17564
llvm::TargetLoweringBase::AtomicExpansionKind::CmpXChg
@ CmpXChg
llvm::MachineInstrBundleIterator< MachineInstr >
llvm::SelectionDAG::getSplatValue
SDValue getSplatValue(SDValue V, bool LegalTypes=false)
If V is a splat vector, return its scalar source operand by extracting that element from the source v...
Definition: SelectionDAG.cpp:2863
llvm::isPowerOf2_64
constexpr bool isPowerOf2_64(uint64_t Value)
Return true if the argument is a power of two > 0 (64 bit edition.)
Definition: MathExtras.h:297
llvm::AArch64ISD::BRCOND
@ BRCOND
Definition: AArch64ISelLowering.h:81
llvm::MVT::v2i16
@ v2i16
Definition: MachineValueType.h:98
llvm::tgtok::TrueVal
@ TrueVal
Definition: TGLexer.h:62
llvm::AArch64ISD::SUNPKHI
@ SUNPKHI
Definition: AArch64ISelLowering.h:318
llvm::EVT::getDoubleNumVectorElementsVT
EVT getDoubleNumVectorElementsVT(LLVMContext &Context) const
Definition: ValueTypes.h:430
parsePredicateConstraint
static PredicateConstraint parsePredicateConstraint(StringRef Constraint)
Definition: AArch64ISelLowering.cpp:9961
Value.h
llvm::AArch64TargetLowering::insertSSPDeclarations
void insertSSPDeclarations(Module &M) const override
Inserts necessary declarations for SSP (stack protection) purpose.
Definition: AArch64ISelLowering.cpp:22928
llvm::AArch64ISD::UINT_TO_FP_MERGE_PASSTHRU
@ UINT_TO_FP_MERGE_PASSTHRU
Definition: AArch64ISelLowering.h:142
llvm::abs
APFloat abs(APFloat X)
Returns the absolute value of the argument.
Definition: APFloat.h:1332
llvm::AArch64ISD::FNEARBYINT_MERGE_PASSTHRU
@ FNEARBYINT_MERGE_PASSTHRU
Definition: AArch64ISelLowering.h:132
llvm::ISD::INSERT_VECTOR_ELT
@ INSERT_VECTOR_ELT
INSERT_VECTOR_ELT(VECTOR, VAL, IDX) - Returns VECTOR with the element at IDX replaced with VAL.
Definition: ISDOpcodes.h:523
changeFPCCToAArch64CC
static void changeFPCCToAArch64CC(ISD::CondCode CC, AArch64CC::CondCode &CondCode, AArch64CC::CondCode &CondCode2)
changeFPCCToAArch64CC - Convert a DAG fp condition code to an AArch64 CC.
Definition: AArch64ISelLowering.cpp:2911
hasOneUse
static bool hasOneUse(unsigned Reg, MachineInstr *Def, MachineRegisterInfo &MRI, MachineDominatorTree &MDT, LiveIntervals &LIS)
Definition: WebAssemblyRegStackify.cpp:283
llvm::MachineMemOperand::MONone
@ MONone
Definition: MachineMemOperand.h:132
llvm::AArch64ISD::GLDFF1_SCALED_MERGE_ZERO
@ GLDFF1_SCALED_MERGE_ZERO
Definition: AArch64ISelLowering.h:392
llvm::SelectionDAG::getMaskedScatter
SDValue getMaskedScatter(SDVTList VTs, EVT MemVT, const SDLoc &dl, ArrayRef< SDValue > Ops, MachineMemOperand *MMO, ISD::MemIndexType IndexType, bool IsTruncating=false)
Definition: SelectionDAG.cpp:9051
llvm::MemoryLocation::UnknownSize
@ UnknownSize
Definition: MemoryLocation.h:216
llvm::AArch64TargetLowering::computeKnownBitsForTargetNode
void computeKnownBitsForTargetNode(const SDValue Op, KnownBits &Known, const APInt &DemandedElts, const SelectionDAG &DAG, unsigned Depth=0) const override
Determine which of the bits specified in Mask are known to be either zero or one and return them in t...
Definition: AArch64ISelLowering.cpp:2144
llvm::AArch64ISD::SMINV
@ SMINV
Definition: AArch64ISelLowering.h:264
llvm::ISD::STACKSAVE
@ STACKSAVE
STACKSAVE - STACKSAVE has one operand, an input chain.
Definition: ISDOpcodes.h:1052
llvm::TargetLoweringBase::MaxStoresPerMemmove
unsigned MaxStoresPerMemmove
Specify maximum number of store instructions per memmove call.
Definition: TargetLowering.h:3486
llvm::ISD::FLOG
@ FLOG
Definition: ISDOpcodes.h:918
llvm::RegState::Define
@ Define
Register definition.
Definition: MachineInstrBuilder.h:44
isINSMask
static bool isINSMask(ArrayRef< int > M, int NumInputElements, bool &DstIsLeft, int &Anomaly)
Definition: AArch64ISelLowering.cpp:10942
llvm::SelectionDAG::getExtLoad
SDValue getExtLoad(ISD::LoadExtType ExtType, const SDLoc &dl, EVT VT, SDValue Chain, SDValue Ptr, MachinePointerInfo PtrInfo, EVT MemVT, MaybeAlign Alignment=MaybeAlign(), MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes())
Definition: SelectionDAG.cpp:8135
llvm::EVT::isFixedLengthVector
bool isFixedLengthVector() const
Definition: ValueTypes.h:164
llvm::ISD::FP_ROUND
@ FP_ROUND
X = FP_ROUND(Y, TRUNC) - Rounding 'Y' from a larger floating point type down to the precision of the ...
Definition: ISDOpcodes.h:852
llvm::MVT::f32
@ f32
Definition: MachineValueType.h:57
llvm::TargetLoweringBase::getAsmOperandValueType
virtual EVT getAsmOperandValueType(const DataLayout &DL, Type *Ty, bool AllowUnknown=false) const
Definition: TargetLowering.h:1491
llvm::AArch64_AM::isAdvSIMDModImmType11
static bool isAdvSIMDModImmType11(uint64_t Imm)
Definition: AArch64AddressingModes.h:656
llvm::AArch64TargetLowering::EmitZero
MachineBasicBlock * EmitZero(MachineInstr &MI, MachineBasicBlock *BB) const
Definition: AArch64ISelLowering.cpp:2754
llvm::AArch64ISD::FADD_PRED
@ FADD_PRED
Definition: AArch64ISelLowering.h:96
llvm::AArch64II::MO_GOT
@ MO_GOT
MO_GOT - This flag indicates that a symbol operand represents the address of the GOT entry for the sy...
Definition: AArch64BaseInfo.h:742
llvm::AArch64ISD::ST2post
@ ST2post
Definition: AArch64ISelLowering.h:451
llvm::codegen::getCodeModel
CodeModel::Model getCodeModel()
llvm::MachinePointerInfo::getStack
static MachinePointerInfo getStack(MachineFunction &MF, int64_t Offset, uint8_t ID=0)
Stack pointer relative access.
Definition: MachineOperand.cpp:1062
llvm::TargetLoweringBase::setPrefLoopAlignment
void setPrefLoopAlignment(Align Alignment)
Set the target's preferred loop alignment.
Definition: TargetLowering.h:2532
llvm::Value
LLVM Value Representation.
Definition: Value.h:74
llvm::ISD::ROTR
@ ROTR
Definition: ISDOpcodes.h:695
llvm::MachineMemOperand::getFlags
Flags getFlags() const
Return the raw flags of the source value,.
Definition: MachineMemOperand.h:219
llvm::AtomicCmpXchgInst
An instruction that atomically checks whether a specified value is in a memory location,...
Definition: Instructions.h:513
llvm::AArch64Subtarget::Falkor
@ Falkor
Definition: AArch64Subtarget.h:75
llvm::SelectionDAG::getTarget
const TargetMachine & getTarget() const
Definition: SelectionDAG.h:469
llvm::ISD::AVGFLOORS
@ AVGFLOORS
AVGFLOORS/AVGFLOORU - Averaging add - Add two integers using an integer of type i[N+1],...
Definition: ISDOpcodes.h:643
createGPRPairNode
static SDValue createGPRPairNode(SelectionDAG &DAG, SDValue V)
Definition: AArch64ISelLowering.cpp:22138
Debug.h
llvm::EVT::bitsEq
bool bitsEq(EVT VT) const
Return true if this has the same number of bits as VT.
Definition: ValueTypes.h:228
llvm::VectorType::get
static VectorType * get(Type *ElementType, ElementCount EC)
This static method is the primary way to construct an VectorType.
Definition: Type.cpp:670
llvm::EVT::isFloatingPoint
bool isFloatingPoint() const
Return true if this is a FP or a vector FP type.
Definition: ValueTypes.h:139
performORCombine
static SDValue performORCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, const AArch64Subtarget *Subtarget, const AArch64TargetLowering &TLI)
Definition: AArch64ISelLowering.cpp:16294
llvm::ISD::isConstantSplatVectorAllOnes
bool isConstantSplatVectorAllOnes(const SDNode *N, bool BuildVectorOnly=false)
Return true if the specified node is a BUILD_VECTOR or SPLAT_VECTOR where all of the elements are ~0 ...
Definition: SelectionDAG.cpp:171
llvm::SDNode::hasAnyUseOfValue
bool hasAnyUseOfValue(unsigned Value) const
Return true if there are any use of the indicated value.
Definition: SelectionDAG.cpp:11217
llvm::ISD::SET_ROUNDING
@ SET_ROUNDING
Set rounding mode.
Definition: ISDOpcodes.h:867
llvm::RegState::ImplicitDefine
@ ImplicitDefine
Definition: MachineInstrBuilder.h:63
llvm::ISD::ATOMIC_LOAD
@ ATOMIC_LOAD
Val, OUTCHAIN = ATOMIC_LOAD(INCHAIN, ptr) This corresponds to "load atomic" instruction.
Definition: ISDOpcodes.h:1161
llvm::TargetRegisterInfo::getCallPreservedMask
virtual const uint32_t * getCallPreservedMask(const MachineFunction &MF, CallingConv::ID) const
Return a mask of call-preserved registers for the given calling convention on the current function.
Definition: TargetRegisterInfo.h:482
llvm::ComplexDeinterleavingOperation::CMulPartial
@ CMulPartial
llvm::MachineBasicBlock::end
iterator end()
Definition: MachineBasicBlock.h:311
llvm::AArch64ISD::LD1S_MERGE_ZERO
@ LD1S_MERGE_ZERO
Definition: AArch64ISelLowering.h:359
llvm::AArch64ISD::STP
@ STP
Definition: AArch64ISelLowering.h:480
llvm::AArch64Subtarget::isTargetAndroid
bool isTargetAndroid() const
Definition: AArch64Subtarget.h:265
llvm::SMEAttrs::ZA_Preserved
@ ZA_Preserved
Definition: AArch64SMEAttributes.h:36
llvm::Type::getFloatTy
static Type * getFloatTy(LLVMContext &C)
Definition: Type.cpp:228
performSetccMergeZeroCombine
static SDValue performSetccMergeZeroCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI)
Definition: AArch64ISelLowering.cpp:20525
llvm::MemSDNode::getPointerInfo
const MachinePointerInfo & getPointerInfo() const
Definition: SelectionDAGNodes.h:1361
llvm::CCValAssign::needsCustom
bool needsCustom() const
Definition: CallingConvLower.h:124
llvm::ISD::SCALAR_TO_VECTOR
@ SCALAR_TO_VECTOR
SCALAR_TO_VECTOR(VAL) - This represents the operation of loading a scalar value into element 0 of the...
Definition: ISDOpcodes.h:606
llvm::SrcOp
Definition: MachineIRBuilder.h:128
llvm::AArch64ISD::FMIN_PRED
@ FMIN_PRED
Definition: AArch64ISelLowering.h:101
llvm::AArch64TargetLowering::functionArgumentNeedsConsecutiveRegisters
bool functionArgumentNeedsConsecutiveRegisters(Type *Ty, CallingConv::ID CallConv, bool isVarArg, const DataLayout &DL) const override
For some targets, an LLVM struct type must be broken down into multiple simple types,...
Definition: AArch64ISelLowering.cpp:22884
llvm::TargetLowering::getConstraintType
virtual ConstraintType getConstraintType(StringRef Constraint) const
Given a constraint, return the type of constraint it is for this target.
Definition: TargetLowering.cpp:5170
llvm::ISD::CTPOP
@ CTPOP
Definition: ISDOpcodes.h:703
canEmitConjunction
static bool canEmitConjunction(const SDValue Val, bool &CanNegate, bool &MustBeFirst, bool WillNegate, unsigned Depth=0)
Returns true if Val is a tree of AND/OR/SETCC operations that can be expressed as a conjunction.
Definition: AArch64ISelLowering.cpp:3231
llvm::AArch64FunctionInfo::setTailCallReservedStack
void setTailCallReservedStack(unsigned bytes)
Definition: AArch64MachineFunctionInfo.h:220
performAddUADDVCombine
static SDValue performAddUADDVCombine(SDNode *N, SelectionDAG &DAG)
Definition: AArch64ISelLowering.cpp:17234
llvm::SelectionDAG::getSetCC
SDValue getSetCC(const SDLoc &DL, EVT VT, SDValue LHS, SDValue RHS, ISD::CondCode Cond, SDValue Chain=SDValue(), bool IsSignaling=false)
Helper function to make it easier to build SetCC's if you just have an ISD::CondCode instead of an SD...
Definition: SelectionDAG.h:1167
llvm::ISD::SADDSAT
@ SADDSAT
RESULT = [US]ADDSAT(LHS, RHS) - Perform saturation addition on 2 integers with the same bit width (W)...
Definition: ISDOpcodes.h:340
llvm::TargetLoweringBase::LibCall
@ LibCall
Definition: TargetLowering.h:200
llvm::ISD::TokenFactor
@ TokenFactor
TokenFactor - This node takes multiple tokens as input and produces a single token result.
Definition: ISDOpcodes.h:52
llvm::AArch64FunctionInfo::setVarArgsGPRSize
void setVarArgsGPRSize(unsigned Size)
Definition: AArch64MachineFunctionInfo.h:349
llvm::SelectionDAG::getMergeValues
SDValue getMergeValues(ArrayRef< SDValue > Ops, const SDLoc &dl)
Create a MERGE_VALUES node from the given operands.
Definition: SelectionDAG.cpp:7882
llvm::AArch64ISD::GLDNT1_INDEX_MERGE_ZERO
@ GLDNT1_INDEX_MERGE_ZERO
Definition: AArch64ISelLowering.h:410
llvm::AArch64Subtarget::getPrefFunctionAlignment
Align getPrefFunctionAlignment() const
Definition: AArch64Subtarget.h:244
llvm::sampleprof::Base
@ Base
Definition: Discriminator.h:58
llvm::MaskedGatherScatterSDNode::isIndexSigned
bool isIndexSigned() const
Definition: SelectionDAGNodes.h:2826
llvm::AArch64_AM::encodeAdvSIMDModImmType7
static uint8_t encodeAdvSIMDModImmType7(uint64_t Imm)
Definition: AArch64AddressingModes.h:549
llvm::AArch64II::MO_PAGE
@ MO_PAGE
MO_PAGE - A symbol operand with this flag represents the pc-relative offset of the 4K page containing...
Definition: AArch64BaseInfo.h:706
llvm::Use
A Use represents the edge between a Value definition and its users.
Definition: Use.h:43
llvm::ISD::EXTRACT_ELEMENT
@ EXTRACT_ELEMENT
EXTRACT_ELEMENT - This is used to get the lower or upper (determined by a Constant,...
Definition: ISDOpcodes.h:222
llvm::Type::getPrimitiveSizeInBits
TypeSize getPrimitiveSizeInBits() const LLVM_READONLY
Return the basic size of this type if it is a primitive type.
Definition: Type.cpp:163
llvm::AArch64ISD::DUPLANE32
@ DUPLANE32
Definition: AArch64ISelLowering.h:175
llvm::SmallVectorImpl::emplace_back
reference emplace_back(ArgTypes &&... Args)
Definition: SmallVector.h:941
llvm::Function::size
size_t size() const
Definition: Function.h:756
llvm::ISD::MSCATTER
@ MSCATTER
Definition: ISDOpcodes.h:1224
llvm::AArch64ISD::FDIV_PRED
@ FDIV_PRED
Definition: AArch64ISelLowering.h:97
llvm::EVT::getSimpleVT
MVT getSimpleVT() const
Return the SimpleValueType held in the specified simple EVT.
Definition: ValueTypes.h:288
llvm::ISD::isBuildVectorAllOnes
bool isBuildVectorAllOnes(const SDNode *N)
Return true if the specified node is a BUILD_VECTOR where all of the elements are ~0 or undef.
Definition: SelectionDAG.cpp:262
isLegalArithImmed
static bool isLegalArithImmed(uint64_t C)
Definition: AArch64ISelLowering.cpp:3035
llvm::ISD::isBuildVectorAllZeros
bool isBuildVectorAllZeros(const SDNode *N)
Return true if the specified node is a BUILD_VECTOR where all of the elements are 0 or undef.
Definition: SelectionDAG.cpp:266
SmallSet.h
llvm::SmallPtrSetImpl::insert
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
Definition: SmallPtrSet.h:365
llvm::MVT::v4i1
@ v4i1
Definition: MachineValueType.h:68
llvm::TLSModel::LocalExec
@ LocalExec
Definition: CodeGen.h:49
llvm::MVT::getIntegerVT
static MVT getIntegerVT(unsigned BitWidth)
Definition: MachineValueType.h:1246
llvm::Intrinsic::ID
unsigned ID
Definition: TargetTransformInfo.h:39
llvm::AArch64ISD::FROUND_MERGE_PASSTHRU
@ FROUND_MERGE_PASSTHRU
Definition: AArch64ISelLowering.h:136
llvm::SmallVectorImpl::insert
iterator insert(iterator I, T &&Elt)
Definition: SmallVector.h:809
llvm::AtomicRMWInst::Max
@ Max
*p = old >signed v ? old : v
Definition: Instructions.h:746
llvm::DataLayout::getTypeAllocSize
TypeSize getTypeAllocSize(Type *Ty) const
Returns the offset in bytes between successive objects of the specified type, including alignment pad...
Definition: DataLayout.h:500
llvm::ISD::VSELECT
@ VSELECT
Select with a vector condition (op #0) and two vector operands (ops #1 and #2), returning a vector re...
Definition: ISDOpcodes.h:722
llvm::PoisonValue::get
static PoisonValue * get(Type *T)
Static factory methods - Return an 'poison' object of the specified type.
Definition: Constants.cpp:1759
llvm::SelectionDAG::getCondCode
SDValue getCondCode(ISD::CondCode Cond)
Definition: SelectionDAG.cpp:1925
llvm::AArch64ISD::NVCAST
@ NVCAST
Natural vector cast.
Definition: AArch64ISelLowering.h:303
llvm::MVT::integer_fixedlen_vector_valuetypes
static auto integer_fixedlen_vector_valuetypes()
Definition: MachineValueType.h:1554
LowerBRCOND
static SDValue LowerBRCOND(SDValue Op, SelectionDAG &DAG)
Definition: AArch64ISelLowering.cpp:5818
llvm::ISD::FDIV
@ FDIV
Definition: ISDOpcodes.h:393
llvm::LLT
Definition: LowLevelTypeImpl.h:39
llvm::AArch64ISD::SUBS
@ SUBS
Definition: AArch64ISelLowering.h:155
llvm::LLT::fixed_vector
static constexpr LLT fixed_vector(unsigned NumElements, unsigned ScalarSizeInBits)
Get a low-level fixed-width vector of some number of elements and element width.
Definition: LowLevelTypeImpl.h:76